PyTorch

PyTorch Tutorial

Python3中机器学习框架

dataset = MyDataset(file)
dataloader = DataLoader(dataset, batch_size = size , shuffle = True)
Training : True
Testing : False
from torch.utils.data import Dataset, DateLoader
class MyDataset(Dataset):
	def __init__(self, file): # read data & preprocess
		self.data = ...
	def __getitem__(self,index): #return one sample at a time
		return self.data[index]
	def __len__(self): #return the size of the dataset
		return len(self.data)
dataset = MyDataset(file)
dataloader = Dataloader(dataset, batch_size, shuffle = True)
shuffle : Training -> true
		  Testing -> false

Tersors

High-dimensional matrices(arrays)

.shape() # show the dimension
#Directly from data (list or numpy.ndarray)
x = torch.tensor([1, -1], [-1, 1])
x = torch.from_numpy(np.array([[1, -1], [-1, 1]]))
#Tensor of constant zeros & ones 
x = torch.zeros([2, 2])
x = torch.ones([1, 2, 5])
x+y x-y y=x.pow(2) y=x.sum() y=x.mean()
#Transpose : transpose two specified dimensions
x = x.transpose(dim0,dim1) # change the dimension dim0 and dim1
#Squeeze : remove the specified dimension with length 1 
x = x.squeeze(1)
#unsqueeze expand a new dimension
x = x.unsqueeze(1)

dim in PyTorch == axis in NumPy

dimensional

Check with.shape()	

Creating Tensors

  • Directly from data (list or numpy.ndarray)

    x = torch.tensor([1, -1], [-1, 1])
    x = torch.from_numpy(np.array([[1, -1], [-1, 1]]))
    
  • Tensor of constant zeros & ones

x = torch.zeros([2,2])
x = torch.ones([1, 2, 5])
  • Common Operations

addition subtraction power summation mean

  • transpose
x.shape
x.transpose(0,1)

Unsqueeze : expand a new dimension

x = x.unsqueeze(1)

**Cat **: conncatenate multiple tensors 合并多个矩阵

torch.cat([x, y, z], dim = 1)

Data Type: Using different data types for model and data will case errors.

32-bit -torch.float

64-bit -torch.long

Device

  • Tensors & modules will be computed with CPU by default
  • Use .to() to move tensors to appropriate devices
    • CPU
      • x = x.to('cpu') - ```py x = x.to('cuda')
  • GPU
    • check if your computer has NVIDIA GPU
      • torch.cuda.is_available() - Multiple GPUs : specify- ``` 'cuda:0', 'cuda:1', 'cuda:2',...

Cradient Calculation

import torch
# 定义一个需要求导的张量 x,并将 requires_grad 参数设置为 True 
x = torch.tensor([[1., 0.], [-1., 1.]], requires_grad=True)
# 计算 x 的平方并对其进行求和,得到张量 z
z = x.pow(2).sum()
# 对张量 z 进行反向传播,自动计算出 x 的梯度
z.backward()
# 输出 x 的梯度
print(x.grad)

torch.nn

Network Layers

  • Linear Layer (Fully-connected Layer)
    • nn.linear(in_features, out_features) #### Non-linear Activation Functions```pynn.Sigmoid()nn.ReLU()

Build your own neural network

import torch.nn as nn
class MyModel(nn.Module):
    #initialize your model & define layers
	def __init__(self):
		super(MyModel, self).__init__()
		self.net = nn.Sequential(
			nn.Linear(10, 32),
			nn.Sigmoid(),
			nn.Linear(32,1)
		)
	#compute output of your nn
	def forward(self, x):
		return self.next()

Loss Functions

  • Mean squared Error (for regression tasks)
criterion = nn.MSELoss()
  • Cross Entropy (for classification tasks) 交叉熵
criterion = nn.CrossEntropyLoss()
  • loss = criterion(model_output, expected_value) ### torch.optim- Stochastic Gradient Descent (SGD) - ```py torch.optim.SGD(model.parameters(), lr, momentum = 0)
  • For every batch of data
    • Call optimizer.zero_grad() to reset gradients of model parameters.
    • Call loss.backward() to backpropagate gradients of prediction loss
    • Call optimizer.step() to adjust model parameters

Neural Network Training Setup

dataset = MyDataSet(file)
tr_set = DataLoader(dataset, 16, shuffle = True)
model = MyModel().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), 0.1)

Training Loop

for epoch in range(n_epochs):  # Iterate over n_epochs
    model.train()  # Set the model to training mode
    for x, y in tr_set:  # Iterate over the training set
        optimizer.zero_grad()  # Clear the gradients
        x, y = x.to(device), y.to(device)  # Move data to the device (e.g., GPU)
        pred = model(x)  # Forward pass, compute predictions
        loss = criterion(pred, y)  # Compute the loss
        loss.backward()  # Backward pass, compute gradients
        optimizer.step()  # Update the model's parameters using the gradients

Validation Loop

model.eval()  # Set the model to evaluation mode
total_loss = 0

for x, y in dv_set:  # Iterate over the validation set
    x, y = x.to(device), y.to(device)  # Move data to the device
    with torch.no_grad():  # Disable gradient computation
        pred = model(x)  # Forward pass, compute predictions
        loss = criterion(pred, y)  # Compute the loss
    total_loss += loss.cpu().item() * len(x)  # Accumulate the loss
avg_loss = total_loss / len(dv_set)  # Calculate the average loss per sample

Testing Loop

model.eval()  # Set the model to evaluation mode
preds = []

for x in tt_set:  # Iterate over the test set
    x = x.to(device)  # Move data to the device
    with torch.no_grad():  # Disable gradient computation
        pred = model(x)  # Forward pass, compute predictions
        preds.append(pred.cpu())  # Append the predictions to the list

Data, demo1

Load data :

use pandas to load a csv file

train_data = pd.read_cav('./name.csv').drop(columns=['date']).values
x_train, y_train = train_data[:,:-1], train_data[:,:-1]

Dataset

init : Read data and preproces

getitem : Return one sample at a time, In this case, one sample includes a 117 dimensional feature and a label

len : Return the size of the dataset. In this case, it is 2699

class COVID19Dataset(Dataset):
    '''
    x: np.ndarray  特征矩阵.
    y: np.ndarray  目标标签, 如果为None,则是预测的数据集
    '''
    def __init__(self, x, y=None):
        if y is None:
            self.y = y
        else:
            self.y = torch.FloatTensor(y)
        self.x = torch.FloatTensor(x)

    def __getitem__(self, idx):
        if self.y is None:
            return self.x[idx]
        return self.x[idx], self.y[idx]

    def __len__(self):
        return len(self.x)

Dataloader

train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True, pin_memory = True)

Model

class My_Model(nn.Module):
    def __init__(self, input_dim):
        super(My_Model, self).__init__()
        # TODO: 修改模型结构, 注意矩阵的维度(dimensions) 
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        x = x.squeeze(1) # (B, 1) -> (B)
        return x

Criterion

criterion = torch.nn.MSELoss(reduction = 'mean')

Optimizer

optimizer = torch.optim.SGD(model.parameters(), lr = 1e-5, momentum = 0.9)

Training Loop

Documentation and Common Errors

read pytorch tutorial