

pytorch coding笔记
pytorch的基础使用,包括模型创建、数据加载、训练与评估等内容。
views
| comments
1.入门#
1.1 数据加载到模型训练与评估example#
1.1.1 模型创建#
一般来说,创建一个模型,首先继承nn.Module类,然后在__init__方法中定义网络层,在forward方法中定义前向传播逻辑。
# model.py
import torch
import torch.nn as nn
class SimpleNN(nn.Module):
def __init__(self):
super(SimpleNN, self).__init__()
self.fc1 = nn.Linear(28 * 28, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = torch.flatten(x, 1)
x = torch.relu(self.fc1(x))
x = self.fc2(x)
return xpython1.1.2 数据加载#
使用Dataset来定义自定义数据集,并使用DataLoader来加载数据,其中的transform用于对图像进行预处理。总的来说是Dataloader封装了Dataset,提供了批量加载、打乱数据等功能,同时Dataset负责数据的读取并利用transform进行预处理。
# data_loader.py
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
class MyCustomDataset(Dataset):
def __init__(self, image_dir, labels_file, transform=None):
self.image_dir = image_dir
self.transform = transform
# 假设标签文件格式:image_name label
with open(labels_file, 'r') as f:
self.labels = [line.strip().split() for line in f.readlines()]
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
img_name, label = self.labels[idx]
img_path = os.path.join(self.image_dir, img_name)
image = Image.open(img_path).convert("RGB")
label = int(label)
if self.transform:
image = self.transform(image)
return image, label
def get_data_loaders(batch_size=64):
transform = transforms.Compose([
transforms.Resize((28, 28)),
transforms.Grayscale(num_output_channels=1),
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
# 训练数据集加载
train_dataset = MyCustomDataset(
image_dir='./data/train',
labels_file='./data/train_labels.txt',
transform=transform
)
# 测试数据集加载
test_dataset = MyCustomDataset(
image_dir='./data/test',
labels_file='./data/test_labels.txt',
transform=transform
)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
return train_loader, test_loaderpython1.1.3 训练#
训练过程包括前向传播、计算损失、反向传播和优化模型参数,保存训练好的模型。然后超参数如学习率、批量大小、训练轮数等可以通过命令行参数进行配置,方便调整。
# train.py
import torch
import torch.nn as nn
import torch.optim as optim
import argparse
from model import SimpleNN
from data_loader import get_data_loaders
def train(args):
# 配置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 获取数据加载器,使用命令行参数 batch_size
train_loader, _ = get_data_loaders(batch_size=args.batch_size)
model = SimpleNN().to(device)
criterion = nn.CrossEntropyLoss()
# 使用命令行参数 lr
optimizer = optim.AdamW(model.parameters(), lr=args.lr)
model.train()
# 使用命令行参数 epochs
for epoch in range(args.epochs):
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = criterion(output, target)
loss.backward()
optimizer.step()
if batch_idx % 100 == 0:
print(f"Epoch {epoch + 1}/{args.epochs}, Batch {batch_idx}, Loss: {loss.item():.4f}")
# 使用命令行参数 save_path
torch.save(model.state_dict(), args.save_path)
print(f"Model saved to `{args.save_path}`")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="PyTorch MNIST Training")
# 定义超参数
parser.add_argument("--batch-size", type=int, default=64, help="input batch size for training (default: 64)")
parser.add_argument("--epochs", type=int, default=2, help="number of epochs to train (default: 2)")
parser.add_argument("--lr", type=float, default=0.001, help="learning rate (default: 0.001)")
parser.add_argument("--save-path", type=str, default="simple_nn.pth",
help="path to save the model (default: simple_nn.pth)")
args = parser.parse_args()
train(args)python1.1.4 评估#
评估模型的性能,计算在测试集上的准确率,并加载之前保存的模型权重进行评估,此时不需要计算梯度,因此使用torch.no_grad()来节省内存和计算资源。
# evaluate.py
import torch
from model import SimpleNN
from data_loader import get_data_loaders
def evaluate():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
_, test_loader = get_data_loaders()
model = SimpleNN().to(device)
try:
model.load_state_dict(torch.load("simple_nn.pth", weights_only=True))
model.eval()
except FileNotFoundError:
print("Error: `simple_nn.pth` not found. Please run `train.py` first.")
return
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
print(f"Test Accuracy: {100. * correct / len(test_loader.dataset):.2f}%")
if __name__ == "__main__":
evaluate()python1.2 自定义层#
在__init__方法中定义层,在pytorch中提供了很多常用的层,如nn.Linear、nn.Conv2d、nn.ReLU等。
常用的层包括:
# 1. 线性层 (Fully Connected Layer)
# in_features: 输入神经元个数, out_features: 输出神经元个数
fc = nn.Linear(in_features=784, out_features=128)
# 2. 卷积层 (Convolutional Layer)
# 参数: 输入通道数, 输出通道数, 卷积核尺寸, 步长, 填充
conv = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1)
# 3. 池化层 (Pooling Layer)
max_pool = nn.MaxPool2d(kernel_size=2, stride=2)
avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)
# 4. 激活函数 (Activation Functions)
relu = nn.ReLU()
sigmoid = nn.Sigmoid()
tanh = nn.Tanh()
# 5. 批归一化层 (Batch Normalization)
bn1d = nn.BatchNorm1d(num_features=128)
bn2d = nn.BatchNorm2d(num_features=16)
# 6. 丢弃层 (Dropout Layer)
dropout = nn.Dropout(p=0.5)
# 7. 循环神经网络层 (RNN/LSTM/GRU)
rnn = nn.RNN(input_size=10, hidden_size=20, num_layers=2)
lstm = nn.LSTM(input_size=10, hidden_size=20, num_layers=2)
gru = nn.GRU(input_size=10, hidden_size=20, num_layers=2)
# 8. Transformer 与 注意力机制
transformer = nn.Transformer(d_model=512, nhead=8)
attention = nn.MultiheadAttention(embed_dim=512, num_heads=8)
# 9. Sequential 容器: 用于按顺序包装多个层
model = nn.Sequential(
nn.Linear(784, 128),
nn.ReLU(),
nn.Linear(128, 10)
)python1.3 Dataset与DataLoader#
Dataset是一个抽象类,在实现自己的数据集类的时候需要继承它并实现__init__、__len__和__getitem__方法(这里可以看最开始的例子)
class CustomImageDataset(Dataset):
def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
self.img_labels = pd.read_csv(annotations_file)
self.img_dir = img_dir
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx):
img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
image = read_image(img_path)
label = self.img_labels.iloc[idx, 1]
if self.transform:
image = self.transform(image)
if self.target_transform:
label = self.target_transform(label)
return image, labelpythonDataLoader是一个迭代器,用于批量加载数据集,并支持多线程加载、数据打乱等功能,其中有很多参数
train_loader = DataLoader(
dataset=train_dataset, # 数据集对象
batch_size=64, # 每个批次的样本数量
shuffle=True, # 是否在每个epoch开始时打乱数据
num_workers=4, # 用于数据加载的子进程数量
pin_memory=True # 是否将数据加载到CUDA固定内存中
)python1.4 损失函数与优化器#
1.4.1 损失函数#
损失函数用于衡量模型预测值与真实值之间的差距,常用的损失函数有:
# 1. 均方误差损失 (Mean Squared Error Loss)
mse_loss = nn.MSELoss()
# 2. 交叉熵损失 (Cross Entropy Loss)
cross_entropy_loss = nn.CrossEntropyLoss()
# 3. 二元交叉熵损失 (Binary Cross Entropy Loss)
bce_loss = nn.BCELoss()
# 4. 平滑L1损失 (Smooth L1 Loss)
smooth_l1_loss = nn.SmoothL1Loss()
# 5. KL散度损失 (Kullback-Leibler Divergence Loss)
kl_div_loss = nn.KLDivLoss()python1.4.2 优化器#
优化器用于更新模型参数以最小化损失函数,常用的优化器有:
# 1. 随机梯度下降 (Stochastic Gradient Descent)
sgd_optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# 2. Adam 优化器
adam_optimizer = optim.Adam(model.parameters(), lr=0.001)
# 3. RMSprop 优化器
rmsprop_optimizer = optim.RMSprop(model.parameters(), lr=0.001)
# 4. AdamW 优化器
adamw_optimizer = optim.AdamW(model.parameters(), lr=0.001)python