CheatSheet | pyTorch


Introduction

  1. Dataset

  2. Data loader

  3. Model

  4. Train

  5. Data Parallelism



1. Dataset

  • TODO
import torch.utils.data as data_utl

class Dataset(data_utl.Dataset):
    def __init_(self):
        # Read data and preprocess input features with labels
        self.features = []
        self.labels = []

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]


2. DataLoader

  • TODO
# Create Dataset using torch.utils.data.Dataset
dataset = dataset.Dataset()
# Split Dataset into train and validation set
num_train = int(len(dataset) * 0.9)
train_set, val_set = random_split(dataset, [num_train, len(dataset) - num_train])
# train_loader, val_loader
batch_size = 192 * torch.cuda.device_count() # batch will be divided by the number of gpus 
train_loader = DataLoader(train_set, batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size, shuffle=True)


3. Model

  • Simple Linear Classifier
import torch.nn as nn
import torch.nn.functional as F

class Classifier(nn.Module):
    def __init__(self, num_classes):
        super(SenBert_Classifier, self).__init__()
        self.fc1 = nn.Linear(256, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 32)
        self.fc5 = nn.Linear(32, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x


4. Train

  • TODO
device = "cuda"

# Setup Model
model = senbcls.SenBert_Classifier(num_classes=len(config.CATEGORY_PRED1))
if torch.cuda.device_count() > 1:
    model = torch.nn.DataParallel(model)
model.to(device)
model.train()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config.LEARNING_RATE)
for epoch in range(config.TRAIN_EPOCHS):
    # train model
    start = time.time()
    for step, batch in enumerate(train_loader):
        inputs, labels = batch 
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()


5. Data Parallelism

  • nn.DataParallel : You can simply use multiple gpus with nn.DataParallel that processes followings :

    1. nn.parallel.replicate(module, device_ids) : your model is replicated on each device

    2. nn.parallel.scatter(input, device_ids) : batch size in your dataloader will be devided into num_gpu => batch_size/num_gpu

    3. nn.parallel.gather(outputs, output_device): gathering outputs from replicated models on each device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = myModel()
# multiple gpus => model to DataParallel
if torch.cuda.device_count() > 1:
    model = torch.nn.DataParallel(model)
model.to(device)