Neural Network and Deep Learning with PyTorch (TensorBoard and Hyperparameter Tuning)
In the previous post, we achieved an accuracy around 61%. In this exercise, we will try increasing the accuracy by Hyperparameter Tuning with tensorBoard.
For this exercise, we will use the CIFAR10 dataset. It has the classes: ‘airplane’, ‘automobile’, ‘bird’, ‘cat’, ‘deer’, ‘dog’, ‘frog’, ‘horse’, ‘ship’, ‘truck’. The images in CIFAR-10 are of size 3x32x32, i.e. 3-channel color images of 32x32 pixels in size.
The dataset is divided into five training batches and one test batch, each with 10000 images. The test batch contains exactly 1000 randomly-selected images from each class. The training batches contain the remaining images in random order, but some training batches may contain more images from one class than another. Between them, the training batches contain exactly 5000 images from each class.
pip install tensorboardX
pip install tensorboard
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from tensorboardX import SummaryWriter
from itertools import product
print(torch.__version__)
print(torchvision.__version__)
transform =transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_set = torchvision.datasets.CIFAR10(root = './data', train=True, transform=transform, download=True)
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 4, shuffle=True)
test_set = torchvision.datasets.CIFAR10(root = './data', train=False, transform=transform, download=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=4, shuffle=False)
def get_num_correct(preds, labels):
return preds.argmax(dim=1).eq(labels).sum().item()
class Network(nn.Module):
def __init__(self):
super(Network,self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5)
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
self.fc1 = nn.Linear(in_features=16*5*5, out_features=120)
self.fc2 = nn.Linear(in_features=120, out_features=84)
self.out = nn.Linear(in_features=84, out_features=10)
def forward(self, t):
#Layer 1
t = t
#Layer 2
t = self.conv1(t)
t = F.relu(t)
t = F.max_pool2d(t, kernel_size=2, stride=2)#output shape : (6,14,14)
#Layer 3
t = self.conv2(t)
t = F.relu(t)
t = F.max_pool2d(t, kernel_size=2, stride=2)#output shape : (16,5,5)
#Layer 4
t = t.reshape(-1, 16*5*5)
t = self.fc1(t)
t = F.relu(t)#output shape : (1,120)
#Layer 5
t = self.fc2(t)
t = F.relu(t)#output shape : (1, 84)
#Layer 6/ Output Layer
t = self.out(t)#output shape : (1,10)
return t
parameters = dict(
lr = [.01, .001]
,batch_size = [100, 1000]
,shuffle = [True, False]
)
param_values = [v for v in parameters.values()]
param_values
for lr, batch_size, shuffle in product(*param_values):
print (lr, batch_size, shuffle)
We will train the network for each combination for 5 epochs.
for lr, batch_size, shuffle in product(*param_values):
comment = f' batch_size={batch_size} lr={lr} shuffle={shuffle}'
network = Network()
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=shuffle)
optimizer = optim.Adam(network.parameters(), lr=lr)
images, labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(images)
tb = SummaryWriter(comment=comment)
tb.add_image('images', grid)
tb.add_graph(network, images)
for epoch in range(5):
total_loss = 0
total_correct = 0
for batch in train_loader:
images, labels = batch # Get Batch
preds = network(images) # Pass Batch
loss = F.cross_entropy(preds, labels) # Calculate Loss
optimizer.zero_grad() # Zero Gradients
loss.backward() # Calculate Gradients
optimizer.step() # Update Weights
total_loss += loss.item() * batch_size
total_correct += get_num_correct(preds, labels)
tb.add_scalar('Loss', total_loss, epoch)
tb.add_scalar('Number Correct', total_correct, epoch)
tb.add_scalar('Accuracy', total_correct / len(train_set), epoch)
for name, param in network.named_parameters():
tb.add_histogram(name, param, epoch)
tb.add_histogram(f'{name}.grad', param.grad, epoch)
print("epoch", epoch, "total_correct:", total_correct, "loss:", total_loss)
tb.close()
%load_ext tensorboard
%tensorboard --logdir runs
Hyperparameter Tuning
We get the highest accuracy with batch size = 100, learning rate = 0.001.
We get the least loss with batch size = 100, learning rate = 0.001.
We get the highest number of correct with batch size = 100, learning rate = 0.001
Next, we will train our model with the above hyperparameters for 20 epochs. The Shuffle should always be set to True
for a dataset that is to be used for training. for better results, try training with 30-40 epochs.
network = Network()
optimizer = optim.Adam(network.parameters(), lr=0.001)
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 100, shuffle=True)
for epoch in range(20):
total_correct = 0
total_loss = 0
for batch in train_loader: #Get batch
images, labels = batch #Unpack the batch into images and labels
preds = network(images) #Pass batch
loss = F.cross_entropy(preds, labels) #Calculate Loss
optimizer.zero_grad()
loss.backward() #Calculate gradients
optimizer.step() #Update weights
total_loss += loss.item() * batch_size
total_correct += preds.argmax(dim=1).eq(labels).sum().item()
print('epoch:', epoch, "total_correct:", total_correct, "loss:", total_loss)
print('>>> Training Complete >>>')
@torch.no_grad()
def get_all_preds(model, loader):
all_preds = torch.tensor([])
for batch in loader:
images, labels = batch
preds = model(images)
all_preds = torch.cat((all_preds, preds) ,dim=0)
return all_preds
test_preds = get_all_preds(network, test_loader)
actual_labels = torch.Tensor(test_set.targets)
preds_correct = test_preds.argmax(dim=1).eq(actual_labels).sum().item()
print('total correct:', preds_correct)
print('accuracy:', preds_correct / len(test_set))
The model predicted the label with 63 % accuracy, which is NOT that better than what we predicted without hyper-parameter tuning. Next, we will develop a confusion matrix which will demonstrate, in which particular areas our model is performing poorly.
import itertools
import numpy as np
import matplotlib.pyplot as plt
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
cm = confusion_matrix(test_set.targets, test_preds.argmax(dim=1))
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
plt.figure(figsize=(10,10))
plot_confusion_matrix(cm, classes)