Introduction
-
Dataset
-
Data loader
-
Model
-
Train
-
Data Parallelism
1. Dataset
- TODO
import torch.utils.data as data_utl
class Dataset(data_utl.Dataset):
def __init_(self):
# Read data and preprocess input features with labels
self.features = []
self.labels = []
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
return self.features[idx], self.labels[idx]
2. DataLoader
- TODO
# Create Dataset using torch.utils.data.Dataset
dataset = dataset.Dataset()
# Split Dataset into train and validation set
num_train = int(len(dataset) * 0.9)
train_set, val_set = random_split(dataset, [num_train, len(dataset) - num_train])
# train_loader, val_loader
batch_size = 192 * torch.cuda.device_count() # batch will be divided by the number of gpus
train_loader = DataLoader(train_set, batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size, shuffle=True)
3. Model
- Simple Linear Classifier
import torch.nn as nn
import torch.nn.functional as F
class Classifier(nn.Module):
def __init__(self, num_classes):
super(SenBert_Classifier, self).__init__()
self.fc1 = nn.Linear(256, 256)
self.fc2 = nn.Linear(256, 128)
self.fc3 = nn.Linear(128, 64)
self.fc4 = nn.Linear(64, 32)
self.fc5 = nn.Linear(32, num_classes)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = F.relu(self.fc4(x))
x = self.fc5(x)
return x
4. Train
- TODO
device = "cuda"
# Setup Model
model = senbcls.SenBert_Classifier(num_classes=len(config.CATEGORY_PRED1))
if torch.cuda.device_count() > 1:
model = torch.nn.DataParallel(model)
model.to(device)
model.train()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=config.LEARNING_RATE)
for epoch in range(config.TRAIN_EPOCHS):
# train model
start = time.time()
for step, batch in enumerate(train_loader):
inputs, labels = batch
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
5. Data Parallelism
-
nn.DataParallel
: You can simply use multiple gpus withnn.DataParallel
that processes followings :-
nn.parallel.replicate(module, device_ids)
: your model is replicated on each device -
nn.parallel.scatter(input, device_ids)
: batch size in your dataloader will be devided into num_gpu => batch_size/num_gpu -
nn.parallel.gather(outputs, output_device)
: gathering outputs from replicated models on each device
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = myModel()
# multiple gpus => model to DataParallel
if torch.cuda.device_count() > 1:
model = torch.nn.DataParallel(model)
model.to(device)
-
References :