Category: Pytorch Introduction

Posted 2023-12-22Updated 2024-06-20Pytorch Introduction

The reason use the NN is inner kernel of logistic regression is still linear, to avoid the linear relationship, the NN can use activation function, for instance ReLU.

In this case, we use ReLu as our activation function to predict the image, and it can be found that the accuracy is far better than LR, shows more abilities.

from os import path, mkdir
from random import randint

import torch
import numpy as np
import torchvision
from matplotlib import pyplot as plt
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader
import torch.nn.functional as F
import torch.nn as nn

dataset = MNIST(root="./data", download=True, transform=ToTensor())
test_dataset = MNIST(root='./data', train=False, transform=ToTensor())

def split_indices(n, rate):
    # create number of validation set
    n_val = int(n * rate)
    # create shuffled index from 0-n, with no repeat
    idxs = np.random.permutation(n)
    # retuen (n_val,last) index and (first n_val) index
    # i.e. training index and validation index
    return idxs[n_val:], idxs[:n_val]

train_indices, val_indices = split_indices(len(dataset), 0.2)

batch_size = 100
train_sampler = SubsetRandomSampler(train_indices)
train_loder = DataLoader(dataset,
                         batch_size,
                         sampler=train_sampler)

val_sampler = SubsetRandomSampler(val_indices)
val_loder = DataLoader(dataset,
                       batch_size,
                       sampler=val_sampler)

input_size = 28 * 28
num_classes = 10

class MnistModel(nn.Module):

    def __init__(self, in_size, hidden_size, out_size):
        super().__init__()

        self.linear1 = nn.Linear(in_size, hidden_size)

        self.linear2 = nn.Linear(hidden_size, out_size)

    def forward(self, xb):
        # flatten
        xb = xb.view(xb.size(0), -1)
        # xb = xb.reshape(xb.size(0), -1)
        return self.linear2(F.relu(self.linear1(xb)))

# for t in model.parameters():
#     print(t.shape)

# for img, labels in train_loder:
#     outputs = model(img)
#     loss = F.cross_entropy(outputs, labels)
#     break

def get_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

def to_device(data, device):
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

# for img, label in train_loder:
#     print(img.shape)
#     img = to_device(img, device)
#     print(img.device)
#     break

class DeviceDataLoder():
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device

    def __iter__(self):
        # lazy load here
        # instead of load data into device each time, instead, load each batch
        for b in self.dl:
            yield to_device(b, self.device)

    def __len__(self):
        return len(self.dl)

# use DeviceDataLoader as warpper
train_dl = DeviceDataLoder(train_loder, get_device())
valid_dl = DeviceDataLoder(val_loder, get_device())

def loss_batch(model, loss_func, xb, yb, opt=None, metric=None):
    preds = model(xb)

    loss = loss_func(preds, yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    metric_result = None
    if metric is not None:
        metric_result = metric(preds, yb)

    return loss.item(), len(xb), metric_result

def evaluate(model, loss_func, valid_dl, metric=None):
    with torch.no_grad():
        results = [loss_batch(model, loss_func, xb, yb, metric=metric)
                   for xb, yb in valid_dl]

        # separate the lists
        loss, nums, metric = zip(*results)
        total = np.sum(nums)
        avg_loss = np.sum(np.multiply(loss, nums)) / total
        avg_metric = None
        if metric is not None:
            avg_metric = np.sum(np.multiply(metric, nums)) / total
    return avg_loss, total, avg_metric

def fit(epochs, lr, model, loss_func, train_dl, valid_dl, opt_fn=None, metric=None):
    if opt_fn is None:
        opt_fn = torch.optim.SGD
    opt = opt_fn(model.parameters(), lr=lr)
    loss_history = []
    metric_history = []

    for epoch in range(epochs):
        for xb, yb in train_dl:
            loss_batch(model, loss_func, xb, yb, opt)
        result = evaluate(model, loss_func, valid_dl, metric)
        val_loss, total, val_metric = result

        loss_history.append(val_loss)
        metric_history.append(val_metric)

        if metric is not None:
            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {val_loss:.4f}, Metric: {val_metric:.4f}')
        else:
            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {val_loss:.4f}')

    return loss_history, metric_history

def accuracy(output, label):
    _, preds = torch.max(output, dim=1)
    return torch.sum(label == preds).item() / len(preds)

model = MnistModel(input_size, 32, num_classes)
to_device(model, get_device())

if path.exists('./tutorial5/mnist-logistic.pth'):
    model.load_state_dict(torch.load('./tutorial5/mnist-logistic.pth'))

else:
    loss_history, metric_history = fit(5, 0.5, model, F.cross_entropy,
                                       train_dl,
                                       valid_dl,
                                       opt_fn=torch.optim.SGD,
                                       metric=accuracy)
    # it will save the weight and bias for this model
    # new dir
    mkdir('./tutorial5')
    torch.save(model.state_dict(), './tutorial5/mnist-logistic.pth')

def prediction_img(img, model):
    xb = img.unsqueeze(0)
    yb = model(xb)
    _, preds = torch.max(yb, dim=1)
    return preds[0].item()

for i in range(10):
    img, label = test_dataset[randint(0, len(test_dataset) - 1)]
    img_np = np.array(img)
    plt.imshow(img_np.squeeze(), cmap='gray')
    plt.show()
    print(prediction_img(img, model))

Posted 2023-12-22Updated 2024-06-20Pytorch Introduction

Pytorch Tutorial 3

simple linear regression with bulit in tools in pytorch

generate prediction
calculate the loss
compute gradients of w and b
adjust w and b
reset gradients to zero

these 5 steps also respect to the loop in the next function

import numpy as np
import torch.nn as nn
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import torch.nn.functional as F

# temp, rainfall, humidity
# inputs = torch.tensor(np.random.uniform(0, 120, size=(15, 3)))
# the input and output here need to specify the dtype, otherwise, when torch generate the prediction,
# it will encounter the problem of dtype is not match
inputs = torch.tensor(np.array(
    [[109.4144, 11.2775, 32.4521], [2.0002, 47.0248, 49.9469], [27.1528, 57.8907, 91.2076],
     [44.8227, 71.6239, 64.0752], [66.0968, 92.5966, 94.0775], [59.6257, 76.9701, 92.1656],
     [8.1551, 1.7426, 10.5297], [112.6036, 47.2793, 95.4221], [3.2212, 61.8274, 115.9187],
     [35.0351, 110.6133, 66.6992], [8.8387, 21.8008, 50.0480], [68.7698, 59.9815, 12.0230],
     [111.3881, 90.3050, 62.1327], [101.7462, 115.7447, 33.4925], [27.7659, 54.5803, 105.3599]], dtype='float32'))

# apples, oranges
# targets = torch.tensor(np.random.uniform(0, 50, size=(15, 2)))
targets = torch.tensor(np.array(
    [[28.1090, 45.0061], [29.0839, 6.4205], [35.2633, 44.1196],
     [29.5371, 6.8457], [7.4298, 36.1434], [6.6296, 47.1809],
     [49.9750, 49.9321], [34.1796, 16.6732], [46.8875, 7.6084],
     [23.0442, 42.2229], [29.7401, 13.4199], [3.0854, 21.4550],
     [47.6801, 49.1518], [18.7320, 18.4418], [34.2725, 25.8721]], dtype='float32'))
# print(inputs)
# print(targets)

# TensorDataset will creat the structure of pairing (input and target) accordingly
train_ds = TensorDataset(inputs, targets)

batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

# Each batch size is 5, and the data are shuffled
# and is still can contain the pair of data, the structure won't be shuffled
# for xb, yb in train_dl:
#     print("batch:")
#     print(xb)
#     print(yb)

# specify the input and output feature number
model = nn.Linear(3, 2)
# the weight and bias will be initialed automatically, and the parameter of requires_grad will be set as True
# print(model.weight)
# print(model.bias)
# print(list(model.parameters()))

# preds = model(inputs)
# print(preds)

loss_fn = F.mse_loss
loss = loss_fn(model(inputs), targets)
# print(loss)

opt = torch.optim.SGD(model.parameters(), lr=1e-5)

# 1 generate prediction
# 2 calculate the loss
# 3 compute gradients of w and b
# 4 adjust w and b
# 5 reset gradients to zero
# these 5 steps also respect to the loop in the next function

def fit(num_epochs, model, loss_fn, opt):
    # training interation
    for epoch in range(num_epochs):
        # batches in each interation
        for xb, yb in train_dl:
            pred = model(xb)
            loss = loss_fn(pred, yb)
            loss.backward()
            opt.step()
            opt.zero_grad()
        if (epoch+1) % 10 == 0:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item()))

fit(100, model, loss_fn, opt)

Posted 2023-12-21Updated 2024-08-09Pytorch Introduction

Pytorch Tutorial 4

load dataset
1. transform the data into tensor
split the dataset into training, testing, validation datasets
1. define the function of indices shuffle (the dataset are ordered, if missing apply the shuffle, the individual dataset may only contains one label)
2. create sampler and loader
customise the MnistModel function
define loss_batch
1. calculate loss in current batch
define evaluate
1. calculate average loss in batches
define accuracy
1. also called metric to shows the accuracy
create fit function
1. epoch loop
  1. train loop
    1. loss_batch — for train
  2. evaluate result
  3. print result
call fit

from os import path
from random import randint

import torch
import torchvision
from torchvision.datasets import MNIST
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data.dataloader import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F

# transoforms used to transform the MNIST dataset into tensor in order to torch can work with
import torchvision.transforms as transforms

# here the datasets in original format, can not be understood by torch
datasets = MNIST(root='./data', download=True)
# print(len(datasets))

test_dataset = MNIST(root='./data', train=False, transform=transforms.ToTensor())
# print(len(test_dataset))

# img, label = datasets[0]
# plt.imshow(img, cmap='gray')
# plt.show()

# print(label)

# here the dataset is already transformed into tensor
dataset = MNIST(root='./data', download=True, transform=transforms.ToTensor())

# the shape here is 1,28,28, color, height, weight
# img_tensor, label = dataset[0]
# print(img_tensor.shape, label)

# print(img_tensor[:, 10:15, 10:15])
# print(torch.max(img_tensor), torch.min(img_tensor))
# plt.imshow(img_tensor[0, 10:15, 10:15], cmap='gray')
# plt.show()

def split_indices(n, rate):
    # create number of validation set
    n_val = int(n * rate)
    # create shuffled index from 0-n, with no repeat
    idxs = np.random.permutation(n)
    # retuen (n_val,last) index and (first n_val) index
    # i.e. training index and validation index
    return idxs[n_val:], idxs[:n_val]

train_indices, val_indices = split_indices(len(dataset), 0.2)
# print(len(train_indices), len(val_indices))

# the sampler here is randomly select the indices from list with number of batch_size
# the reason for this is lower down the training time and computation
# and utilize multiple epoch to train the model, if not, the training will deal with whole data set,
# that will occupy too much memory space and make too much pressure to computational resources.
# in this case, the training process will transfer to smaller chucks
batch_size = 100
train_sampler = SubsetRandomSampler(train_indices)
train_loder = DataLoader(dataset,
                         batch_size,
                         sampler=train_sampler)

val_sampler = SubsetRandomSampler(val_indices)
val_loder = DataLoader(dataset,
                       batch_size,
                       sampler=val_sampler)

input_size = 28 * 28
num_classes = 10

# model = nn.Linear(input_size, num_classes)

# print(model.weight.shape)
# print(model.bias.shape)
#
# print(model.weight)
# print(model.bias)

# for img, label in train_loder:
#     print(img.shape)
#     print(label)
#     # there is a error, the shape of image is 1*28*28, but the received input shape was set 784
#     # so, the customized model are needed.
#     print(model(img))
#     break

class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        # define the input and output for linear
        self.linear = nn.Linear(input_size, num_classes)

    def forward(self, xb):
        # reshape -1 here avoid the hard code, it will calculate the first dimension number
        xb = xb.reshape(-1, input_size)
        # pass the batch data to linear layer
        out = self.linear(xb)
        return out

model = MnistModel()

# the weight and bias are in the linear(model.linear.weight), instead of the model above(model.weight)
# print(model.linear.weight.shape)
# print(model.linear.bias.shape)
#
# print(model.linear.weight)
# print(model.linear.bias)

def accuracy(l1, l2):
    return torch.sum(l1 == l2).item() / len(l2)

Log plot presentation

Untitled

# for img, label in train_loder:
# the img pass in the model shape is 100,1,28,28
# the output shape is 100,10
# which reaches what we expected (represent the 0-9 digital number)
# here the softmax can be introduced to show the possibility with each number correspondingly
# possibility = e^y_i / sum(e^y_i)
# outputs = model(img)
# the second parameter here indicates the dim index need to be applied
# so 0 means the column direction, and 1 for row direction for 2D matrix
# probs = F.softmax(outputs, 1)
# print(probs.shape)
# so now the probs shape is 100,10, but each value each row represent possibility(0-1), and sum of each row is 1
# print(outputs.shape)
# print(outputs[0])
# max_probs, predicted_labels = torch.max(probs, 1)
# print(accuracy(predicted_labels, label))

# now, we need to define the loss function
# here the cross entropy is most suitable for logistic regression
# i.e.
# the true label 9 is represented vector of [0,0,0,0,0,0,0,0,0,1]
# the predict vector [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] for instance
# and the cross entropy is -ln(y*y_pred) i.e. -ln(1*0.9) = 0.10, which is low

# but, when the prediction is poor
# the true label 1 is represented vector of [0,1,0,0,0,0,0,0,0,0]
# the predict vector [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] for instance
# and the cross entropy is -ln(y*y_pred) i.e. -ln(1*0.2) = 1.6, which is high

# in the cross entropy, we only consider the right label, and ignore the other, because their vector is 0

# so when low possibility for the correct number the cross entropy(loss) is high, v.v

# define the loss function for current batch
# loss = F.cross_entropy(outputs, label)

# the equation here is -e.pow(right prediction possibility)=loss
# so the right possibility is e.pow(-loss)
# learn_rate = 0.001
# optimizer = torch.optim.SGD(model.parameters(), lr=learn_rate)
# optimizer.step()
# break

def loss_batch(model, loss_func, xb, yb, opt=None, metric=None):
    preds = model(xb)
    loss = loss_func(preds, yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    # metric is used for model evaluation
    metric_result = None
    if metric is not None:
        metric_result = metric(preds, yb)

    return loss.item(), len(xb), metric_result

def evaluate(model, loss_func, valid_dl, metric=None):
    with torch.no_grad():
        results = [loss_batch(model, loss_func, xb, yb, metric=metric)
                   for xb, yb in valid_dl]

        # separate the lists
        loss, nums, metric = zip(*results)
        total = np.sum(nums)
        avg_loss = np.sum(np.multiply(loss, nums)) / total
        avg_metric = None
        if metric is not None:
            avg_metric = np.sum(np.multiply(metric, nums)) / total
    return avg_loss, total, avg_metric

def accuracy(output, label):
    _, preds = torch.max(output, dim=1)
    return torch.sum(label == preds).item() / len(preds)

# avg_loss, total, val_acc = evaluate(model, F.cross_entropy, val_loder, metric=accuracy)
# print("Loss: {:.4f}, total:{:.4f}, Accuracy: {:.4f}".format(avg_loss, total, val_acc))

def fit(epochs, model, loss_fn, opt, train_dl, valid_dl, metric=None):
    for epoch in range(epochs):
        for xb, yb in train_dl:
            loss, _, _ = loss_batch(model, loss_fn, xb, yb, opt, metric=metric)

        result = evaluate(model, loss_fn, valid_dl, metric=metric)
        val_loss, total, val_metric = result

        if metric is None:
            print("Epoch [{}/{}], total:{:.4f}, Loss: {:.4f}"
                  .format(epoch + 1, epochs, total, val_loss, val_metric))
        else:
            print("Epoch [{}/{}], total:{:.4f}, Loss: {:.4f}, {}: {:.4f}"
                  .format(epoch + 1, epochs, total, val_loss, metric.__name__, val_metric))

model = MnistModel()

# if path is not blank
if path.exists('mnist-logistic.pth'):
    model.load_state_dict(torch.load('mnist-logistic.pth'))

else:
    fit(5,
        model,
        F.cross_entropy,
        torch.optim.SGD(model.parameters(), lr=0.001),
        train_loder,
        val_loder,
        metric=accuracy)
    # it will save the weight and bias for this model
    torch.save(model.state_dict(), 'mnist-logistic.pth')

# read the saved model into instance
# model2 = MnistModel()
# model2.load_state_dict(torch.load('mnist-logistic.pth'))
# model2.state_dict()

def prediction_img(img, model):
    xb = img.unsqueeze(0)
    yb = model(xb)
    _, preds = torch.max(yb, dim=1)
    return preds[0].item()

for i in range(10):
    img, label = test_dataset[randint(0, len(test_dataset) - 1)]
    img_np = np.array(img)
    plt.imshow(img_np.squeeze(), cmap='gray')
    plt.show()
    print(prediction_img(img, model))

Question

when import test_dataset missing the parameter of transform, made the validation section encounter the problem of img no squeeze parameter
zip(*results), used for unpack the tuples, and pass into multiple instances
avg_loss = np.sum(np.multiply(loss, nums)) / total the reason use multiply here is for last batch number, is might not equals to previous number

Posted 2023-12-20Updated 2024-06-20Pytorch Introduction

Pytorch Tutorial 1

import torch
import numpy as np

t1 = torch.tensor(4.)
print(t1)
print(t1.dtype)

t2 = torch.tensor([1., 2, 3, 4])
print(t2)
print(t2.dtype)
# in this case the all data will be transformed to same data type
# [1., 2., 3., 4.]

t3 = torch.tensor([1., 2, 3, 4])
print(t3)
print(t3.dtype)

t4 = torch.tensor([[1, 2], [1., 4], [4, 3], [5, 6]])
print(t4)
print(t4.dtype)

print(t1.shape)
print(t2.shape)
print(t3.shape)
print(t4.shape)

# ---
x = torch.tensor(3., requires_grad=True)
w = torch.tensor(4., requires_grad=True)
b = torch.tensor(5., requires_grad=True)

y = w * x + b
print(y)
y.backward()

print(x.grad)
print(w.grad)
print(b.grad)

#  convert numpy to torch
x = np.array([[1, 2], [2, 4]])

# use shared memory space, not copy
y = torch.from_numpy(x)

#  copy data
y = torch.tensor(x)

print(y)
print(y.dtype)

# convert torch to numpy
z = y.numpy()
print(z)

Posted 2023-12-20Updated 2024-06-20Pytorch Introduction

Pytorch Tutorial 2

simple linear regression with auto gradient method in pytorch

@ means inner dot
.t() means transpose matrix
.numel() means number of element in matrix
with torch.no_grad() means code insider this block will not track gradients to save memory and computation time

import torch
import numpy as np

inputs = np.array([[0, 0, 3],
                   [0, 1, 9],
                   [1, 0, 8],
                   [1, 1, 28]], dtype='float32')

outputs = np.array([[0, 1],
                    [9, 4],
                    [7, 3],
                    [6, 7]], dtype='float32')

inputs = torch.from_numpy(inputs)
outputs = torch.from_numpy(outputs)

w = torch.randn(2, 3, requires_grad=True)
b = torch.randn(2, requires_grad=True)

# print(b)

def model(x):
    # the b is the vector, when the matrix plus b, the b will be copy bunch of data to make it as the matrix
    return x @ w.t() + b

def mse(t1, t2):
    return torch.sum((t1 - t2) ** 2) / t1.numel()

learning_rate = 1e-5
for t in range(500):
    y_pred = model(inputs)
    loss = mse(y_pred, outputs)
    loss.backward()
    with torch.no_grad():
        w -= learning_rate * w.grad
        b -= learning_rate * b.grad
        w.grad.zero_()
        b.grad.zero_()
    print(loss.item())

Question

Categories

Recents

Archives

Tags