Pytorch Tutorial 4

  1. load dataset
    1. transform the data into tensor
  2. split the dataset into training, testing, validation datasets
    1. define the function of indices shuffle (the dataset are ordered, if missing apply the shuffle, the individual dataset may only contains one label)
    2. create sampler and loader
  3. customise the MnistModel function
  4. define loss_batch
    1. calculate loss in current batch
  5. define evaluate
    1. calculate average loss in batches
  6. define accuracy
    1. also called metric to shows the accuracy
  7. create fit function
    1. epoch loop
      1. train loop
        1. loss_batch — for train
      2. evaluate result
      3. print result
  8. call fit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from os import path
from random import randint

import torch
import torchvision
from torchvision.datasets import MNIST
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F

# transoforms used to transform the MNIST dataset into tensor in order to torch can work with
import torchvision.transforms as transforms

# here the datasets in original format, can not be understood by torch
datasets = MNIST(root='./data', download=True)
# print(len(datasets))

test_dataset = MNIST(root='./data', train=False, transform=transforms.ToTensor())
# print(len(test_dataset))

# img, label = datasets[0]
# plt.imshow(img, cmap='gray')
# plt.show()

# print(label)

# here the dataset is already transformed into tensor
dataset = MNIST(root='./data', download=True, transform=transforms.ToTensor())

# the shape here is 1,28,28, color, height, weight
# img_tensor, label = dataset[0]
# print(img_tensor.shape, label)

# print(img_tensor[:, 10:15, 10:15])
# print(torch.max(img_tensor), torch.min(img_tensor))
# plt.imshow(img_tensor[0, 10:15, 10:15], cmap='gray')
# plt.show()

def split_indices(n, rate):
# create number of validation set
n_val = int(n * rate)
# create shuffled index from 0-n, with no repeat
idxs = np.random.permutation(n)
# retuen (n_val,last) index and (first n_val) index
# i.e. training index and validation index
return idxs[n_val:], idxs[:n_val]

train_indices, val_indices = split_indices(len(dataset), 0.2)
# print(len(train_indices), len(val_indices))

# the sampler here is randomly select the indices from list with number of batch_size
# the reason for this is lower down the training time and computation
# and utilize multiple epoch to train the model, if not, the training will deal with whole data set,
# that will occupy too much memory space and make too much pressure to computational resources.
# in this case, the training process will transfer to smaller chucks
batch_size = 100
train_sampler = SubsetRandomSampler(train_indices)
train_loder = DataLoader(dataset,
batch_size,
sampler=train_sampler)

val_sampler = SubsetRandomSampler(val_indices)
val_loder = DataLoader(dataset,
batch_size,
sampler=val_sampler)

input_size = 28 * 28
num_classes = 10

# model = nn.Linear(input_size, num_classes)

# print(model.weight.shape)
# print(model.bias.shape)
#
# print(model.weight)
# print(model.bias)

# for img, label in train_loder:
# print(img.shape)
# print(label)
# # there is a error, the shape of image is 1*28*28, but the received input shape was set 784
# # so, the customized model are needed.
# print(model(img))
# break

class MnistModel(nn.Module):
def __init__(self):
super().__init__()
# define the input and output for linear
self.linear = nn.Linear(input_size, num_classes)

def forward(self, xb):
# reshape -1 here avoid the hard code, it will calculate the first dimension number
xb = xb.reshape(-1, input_size)
# pass the batch data to linear layer
out = self.linear(xb)
return out

model = MnistModel()

# the weight and bias are in the linear(model.linear.weight), instead of the model above(model.weight)
# print(model.linear.weight.shape)
# print(model.linear.bias.shape)
#
# print(model.linear.weight)
# print(model.linear.bias)

def accuracy(l1, l2):
return torch.sum(l1 == l2).item() / len(l2)

Log plot presentation

Untitled

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# for img, label in train_loder:
# the img pass in the model shape is 100,1,28,28
# the output shape is 100,10
# which reaches what we expected (represent the 0-9 digital number)
# here the softmax can be introduced to show the possibility with each number correspondingly
# possibility = e^y_i / sum(e^y_i)
# outputs = model(img)
# the second parameter here indicates the dim index need to be applied
# so 0 means the column direction, and 1 for row direction for 2D matrix
# probs = F.softmax(outputs, 1)
# print(probs.shape)
# so now the probs shape is 100,10, but each value each row represent possibility(0-1), and sum of each row is 1
# print(outputs.shape)
# print(outputs[0])
# max_probs, predicted_labels = torch.max(probs, 1)
# print(accuracy(predicted_labels, label))

# now, we need to define the loss function
# here the cross entropy is most suitable for logistic regression
# i.e.
# the true label 9 is represented vector of [0,0,0,0,0,0,0,0,0,1]
# the predict vector [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] for instance
# and the cross entropy is -ln(y*y_pred) i.e. -ln(1*0.9) = 0.10, which is low

# but, when the prediction is poor
# the true label 1 is represented vector of [0,1,0,0,0,0,0,0,0,0]
# the predict vector [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] for instance
# and the cross entropy is -ln(y*y_pred) i.e. -ln(1*0.2) = 1.6, which is high

# in the cross entropy, we only consider the right label, and ignore the other, because their vector is 0

# so when low possibility for the correct number the cross entropy(loss) is high, v.v

# define the loss function for current batch
# loss = F.cross_entropy(outputs, label)

# the equation here is -e.pow(right prediction possibility)=loss
# so the right possibility is e.pow(-loss)
# learn_rate = 0.001
# optimizer = torch.optim.SGD(model.parameters(), lr=learn_rate)
# optimizer.step()
# break

def loss_batch(model, loss_func, xb, yb, opt=None, metric=None):
preds = model(xb)
loss = loss_func(preds, yb)

if opt is not None:
loss.backward()
opt.step()
opt.zero_grad()

# metric is used for model evaluation
metric_result = None
if metric is not None:
metric_result = metric(preds, yb)

return loss.item(), len(xb), metric_result

def evaluate(model, loss_func, valid_dl, metric=None):
with torch.no_grad():
results = [loss_batch(model, loss_func, xb, yb, metric=metric)
for xb, yb in valid_dl]

# separate the lists
loss, nums, metric = zip(*results)
total = np.sum(nums)
avg_loss = np.sum(np.multiply(loss, nums)) / total
avg_metric = None
if metric is not None:
avg_metric = np.sum(np.multiply(metric, nums)) / total
return avg_loss, total, avg_metric

def accuracy(output, label):
_, preds = torch.max(output, dim=1)
return torch.sum(label == preds).item() / len(preds)

# avg_loss, total, val_acc = evaluate(model, F.cross_entropy, val_loder, metric=accuracy)
# print("Loss: {:.4f}, total:{:.4f}, Accuracy: {:.4f}".format(avg_loss, total, val_acc))

def fit(epochs, model, loss_fn, opt, train_dl, valid_dl, metric=None):
for epoch in range(epochs):
for xb, yb in train_dl:
loss, _, _ = loss_batch(model, loss_fn, xb, yb, opt, metric=metric)

result = evaluate(model, loss_fn, valid_dl, metric=metric)
val_loss, total, val_metric = result

if metric is None:
print("Epoch [{}/{}], total:{:.4f}, Loss: {:.4f}"
.format(epoch + 1, epochs, total, val_loss, val_metric))
else:
print("Epoch [{}/{}], total:{:.4f}, Loss: {:.4f}, {}: {:.4f}"
.format(epoch + 1, epochs, total, val_loss, metric.__name__, val_metric))

model = MnistModel()

# if path is not blank
if path.exists('mnist-logistic.pth'):
model.load_state_dict(torch.load('mnist-logistic.pth'))

else:
fit(5,
model,
F.cross_entropy,
torch.optim.SGD(model.parameters(), lr=0.001),
train_loder,
val_loder,
metric=accuracy)
# it will save the weight and bias for this model
torch.save(model.state_dict(), 'mnist-logistic.pth')

# read the saved model into instance
# model2 = MnistModel()
# model2.load_state_dict(torch.load('mnist-logistic.pth'))
# model2.state_dict()

def prediction_img(img, model):
xb = img.unsqueeze(0)
yb = model(xb)
_, preds = torch.max(yb, dim=1)
return preds[0].item()

for i in range(10):
img, label = test_dataset[randint(0, len(test_dataset) - 1)]
img_np = np.array(img)
plt.imshow(img_np.squeeze(), cmap='gray')
plt.show()
print(prediction_img(img, model))

Question

  1. when import test_dataset missing the parameter of transform, made the validation section encounter the problem of img no squeeze parameter
  2. zip(*results), used for unpack the tuples, and pass into multiple instances
  3. avg_loss = np.sum(np.multiply(loss, nums)) / total the reason use multiply here is for last batch number, is might not equals to previous number