# Dataset
导入: from torch.utils.data import Dataset
, 需要继承 Dataset 类,并实现 __len__
和 __getitem__
方法
例如: 简单制作图像数据集
from torch.utils.data import Dataset | |
from PIL import Image | |
import os | |
class MyData(Dataset): | |
def __init__(self, root_dir, label_dir): | |
# root_dir: 数据集的根目录 | |
self.root_dir = root_dir | |
# label_dir: 数据集的子目录 | |
self.label_dir = label_dir | |
# 获取子目录中的所有文件名 | |
self.path = os.path.join(self.root_dir, self.label_dir) | |
self.img_path = os.listdir(self.path) | |
# 重写__getitem__方法:根据索引返回图像和标签 | |
def __getitem__(self, idx): | |
img_name = self.img_path[idx] | |
# img_item_path: 数据集的完整路径 | |
img_item_path = os.path.join(self.root_dir, self.label_dir, img_name) | |
img = Image.open(img_item_path) | |
label = self.label_dir | |
return img, label | |
# 重写__len__方法:返回数据集的大小 | |
def __len__(self): | |
return len(self.img_path) | |
root_dir = "dataset/train" | |
ants_label_dir = "ants" | |
beens_label_dir = "bees" | |
ants_dataset = MyData(root_dir, ants_label_dir) | |
bees_dataset = MyData(root_dir, beens_label_dir) | |
# 合并数据集 | |
train_dataset = ants_dataset + bees_dataset | |
# 显示数据集图片 | |
Image.show(train_dataset[0]) |
# Tensorboard 使用
Tensorboard: https://pytorch.org/docs/stable/tensorboard.html
Tensorboard 是一个可视化工具,可以用于可视化训练过程中的各种指标,如损失函数、准确率等。
# 安装
pip install tensorboard |
# 导入模块
SummaryWriter
是 Tensorboard 的核心模块,用于记录训练过程中的各种指标。
from torch.utils.tensorboard import SummaryWriter |
# 使用
绘制曲线图
add_scalar
函数用于记录标量数据,例如损失函数、准确率等。
数据格式: torch.Tensor
或 float
from torch.utils.tensorboard import SummaryWriter | |
# 创建 SummaryWriter 对象 | |
writer = SummaryWriter("logs") | |
# 训练过程中记录数据 | |
for i in range(100): | |
writer.add_scalar("Loss/train", i, i)# 标签名 x 轴值 y 轴值 | |
# 关闭 SummaryWriter 对象 | |
writer.close() |
绘制图像
add_image
函数用于记录图像数据,例如训练过程中的图像、模型结构图等。
数据格式: torch.Tensor, numpy.array, or string/blobname
from torch.utils.tensorboard import SummaryWriter | |
from PIL import Image | |
import numpy as np | |
# 创建 SummaryWriter 对象 | |
writer = SummaryWriter("logs") | |
# 训练过程中记录数据 | |
for i in range(100): | |
writer.add_image("Image", np.random.rand(3, 100, 100), i)# 标签名 图像数据 x 轴值 | |
# 关闭 SummaryWriter 对象 | |
writer.close() |
# 启动
tensorboard --logdir=logs |
# 访问
打开浏览器,输入 http://localhost:6006
即可访问 Tensorboard
# Transformer
# 导入模块
from torchvision import transforms
# ToTensor()
利用 transforms.ToTensor()
将 PIL Image 或者 numpy.ndarray 转成 torch.FloatTensor,形状为 (C, H, W),数值范围为 [0, 1.0],数据类型为 float。
调用示例:
from torchvision import transforms | |
img = Image.open('test.jpg') | |
tensor_img = transforms.ToTensor()(img) | |
# 或者 | |
tensor_trans = transforms.ToTensor() # 创建转换器 | |
tensor_img = tensor_trans(img) |
# Normalize()
利用 transforms.Normalize(mean, std)
对图像进行标准化,即减去均值,再除以标准差,数据类型为 float。
调用示例:
from torchvision import transforms | |
img = Image.open('test.jpg') | |
tensor_trans = transforms.ToTensor() | |
tensor_img = tensor_trans(img) |
# Resize()
利用 transforms.Resize(size)
对图像进行缩放,size 可以是 int,也可以是 tuple,如果是 int,则表示将图像的最短边缩放到指定大小,长边等比例缩放;如果是 tuple,则表示将图像的宽和高分别缩放到指定大小。
调用示例:
from torchvision import transforms | |
img = Image.open('test.jpg') | |
tensor_trans = transforms.ToTensor() | |
tensor_img = tensor_trans(img) | |
resize_trans = transforms.Resize((200, 200)) | |
resize_img = resize_trans(tensor_img) |
# Compose()
利用 transforms.Compose([transforms1, transforms2, ...])
将多个转换操作组合在一起,按照顺序执行。
调用示例:
from torchvision import transforms | |
img = Image.open('test.jpg') | |
tensor_trans = transforms.ToTensor() | |
tensor_img = tensor_trans(img) | |
resize_trans = transforms.Resize((200, 200)) | |
resize_img = resize_trans(tensor_img) | |
# 先缩放到 200*200,再转为 tensor | |
compose_trans = transforms.Compose([tensor_trans, resize_trans]) | |
compose_img = compose_trans(img) |
# RandomCrop()
利用 transforms.RandomCrop(size)
对图像进行随机裁剪,size 可以是 int,也可以是 tuple,如果是 int,则表示裁剪后的图像大小为 size*size,如果是 tuple,则表示裁剪后的图像大小为 size [0]*size [1]。
调用示例:
from torchvision import transforms | |
img = Image.open('test.jpg') | |
tensor_trans = transforms.ToTensor() | |
tensor_img = tensor_trans(img) | |
crop_trans = transforms.RandomCrop((200, 200)) | |
crop_img = crop_trans(tensor_img) |
# datasets 使用
datasets
是 PyTorch 提供的一个数据集模块,可以方便地加载各种常用的数据集,如 MNIST、CIFAR-10、ImageNet 等。
# 导入模块
from torchvision import datasets
使用格式:datasets. 数据集名称 (root, train, transform, download)
- root:数据集的根目录
- train:是否加载训练集,默认为 True
- transform:对图像进行预处理操作的转换器
- download:是否下载数据集,默认为 False
# 数据集下载
from torchvision import datasets | |
# 加载 MNIST 数据集 | |
train_dataset = datasets.MNIST(root='data', train=True, download=True, transform=transforms.ToTensor()) | |
test_dataset = datasets.MNIST(root='data', train=False, download=True, transform=transforms.ToTensor()) | |
# 创建数据加载器 | |
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True) | |
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=False) | |
# 遍历数据集 | |
for images, labels in train_loader: | |
print(images.shape, labels.shape) |
# DataLoader 使用
DataLoader
是 PyTorch 提供的一个数据加载器模块,可以方便地加载各种数据集,并进行批处理、打乱、多线程等操作。
# 导入模块
from torch.utils.data import DataLoader
使用格式:DataLoader (dataset, batch_size, shuffle, num_workers)
- dataset:数据集对象
- batch_size:每个批次的大小
- shuffle:是否打乱数据集,默认为 False
- num_workers:加载数据集的线程数
# 数据集加载
from torch.utils.data import DataLoader | |
# 加载 MNIST 数据集 | |
train_dataset = datasets.MNIST(root='data', train=True, download=True, transform=transforms.ToTensor()) | |
test_dataset = datasets.MNIST(root='data', train=False, download=True, transform=transforms.ToTensor()) | |
# 创建数据加载器 | |
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True) | |
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False) | |
# 遍历数据集 | |
for images, labels in train_loader: | |
print(images.shape, labels.shape) |
# 神经网络
# 导入模块
# 卷积层
torch.nn.Conv2d()
与 torch.nn.functional.conv2d()
的区别
torch.nn.Conv2d()
是nn
模块中的卷积层,可以设置卷积核的大小、步长、填充等参数,并且可以自动计算梯度。torch.nn.functional.conv2d()
是nn.functional
模块中的卷积函数,需要手动设置卷积核的大小等参数。
torch.nn.functional.conv2d()
torch.nn.functional.conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1)
参数:
- input:输入张量,形状为 (N, C_in, H_in, W_in)
- weight:卷积核张量,形状为 (C_out, C_in//groups, kH, kW)
- bias:偏置张量,形状为 (C_out)
- stride:步长,默认为 1
- padding:填充,默认为 0
- dilation:扩张,默认为 1
- groups:分组卷积,默认为 1
示例:
import torch | |
import torch.nn.functional as F | |
# 输入数据 | |
input = torch.tensor([ | |
[1,2,0,3,1], | |
[0,1,2,3,1], | |
[1,2,1,0,0], | |
[0,3,1,1,1], | |
[0,1,0,0,0] | |
]) | |
# 定义卷积核 | |
kernel = torch.tensor([ | |
[1,0,1], | |
[0,1,0], | |
[1,0,1] | |
]) | |
# 将输入数据和卷积核转换为 4 维张量 | |
input = torch.reshape(input,(1,1,5,5)) | |
kernel = torch.reshape(kernel,(1,1,3,3)) | |
# 卷积操作 | |
output = F.conv2d(input, kernel, stride=1) | |
print(output) |
torch.nn.Conv2d()
torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros')
参数:
- in_channels:输入通道数
- out_channels:输出通道数
- kernel_size:卷积核大小,可以是 int 或 tuple
- stride:步长,默认为 1
- padding:填充,默认为 0
- dilation:扩张,默认为 1
- groups:分组卷积,默认为 1
- bias:是否使用偏置,默认为 True
- padding_mode:填充模式,默认为’zeros’
示例:
import torch | |
import torch.nn as nn | |
# 输入数据 | |
input = torch.tensor([ | |
[1,2,0,3,1], | |
[0,1,2,3,1], | |
[1,2,1,0,0], | |
[0,3,1,1,1], | |
[0,1,0,0,0] | |
]) | |
# 定义卷积层 | |
conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, stride=1, padding=1) | |
# 将输入数据转换为 4 维张量 | |
input = torch.reshape(input,(1,1,5,5)) | |
# 卷积操作 | |
output = conv(input) | |
print(output) |
简单模型搭建:
import torch | |
import torchvision | |
from torch import nn | |
from torch.nn import Conv2d | |
from torch.utils.data import DataLoader | |
from torch.utils.tensorboard import SummaryWriter | |
# 准备数据集 | |
dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(), | |
download=True) | |
# 创建数据加载器 | |
dataloader = DataLoader(dataset, batch_size=64) | |
# 定义网络 | |
class Net(nn.Module): | |
def __init__(self): # 初始化 | |
super(Tudui, self).__init__() # 继承父类 | |
self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0) | |
def forward(self, input): | |
x = self.conv1(input) | |
return x | |
# tensorboard 可视化 | |
writer = SummaryWriter("../logs") | |
# 实例化网络 | |
tudui = Tudui() | |
step = 0 | |
# 遍历数据集 | |
for data in dataloader: | |
# 获取数据和标签 | |
imgs, arget = data | |
# 前向传播 | |
output = tudui(imgs) | |
# 添加到 tensorboard | |
writer.add_images("input", imgs, step) | |
# 将输出数据转换为 4 维张量 | |
output = torch.reshape(output, (-1, 3, 30, 30)) | |
# 添加到 tensorboard | |
writer.add_images("output", output, step) | |
step = step + 1 | |
writer.close() |
# 池化层
torch.nn.MaxPool2d()
torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
参数:
- kernel_size:池化核大小,可以是 int 或 tuple
- stride:步长,默认为 kernel_size
- padding:填充,默认为 0
- dilation:扩张,默认为 1
- return_indices:是否返回索引,默认为 False
- ceil_mode:是否向上取整,默认为 False
示例 1:
import torch | |
import torchvision | |
from torch import nn | |
from torch.nn import MaxPool2d | |
# 定义网络 | |
class Net(nn.Module): | |
def __init__(self): | |
super(Tudui, self).__init__() | |
self.maxpool1 = MaxPool2d(kernel_size=3,ceil_mode=False) | |
def forward(self,x): | |
x = self.maxpool1(x) | |
return x | |
input = torch.tensor([ | |
[1,2,0,3,1], | |
[0,1,2,3,1], | |
[1,0,1,0,0], | |
[5,2,3,1,1], | |
[2,1,0,1,1]],dtype =torch.float32) # 将数据类型转换为 float32 | |
print(input.shape) | |
input = torch.reshape(input,(-1,1,5,5)) # 将数据转换为 4 维张量 (batch_size,channel,height,width) | |
print(input.shape) | |
# 实例化网络 | |
net = Net() | |
# 前向传播 | |
output = net(input) | |
# 打印输出 | |
print(output) |
** 示例 2:** 采用图像数据集
import torchvision | |
from torch import nn | |
from torch.nn import MaxPool2d | |
from torch.utils.data import DataLoader | |
from torch.utils.tensorboard import SummaryWriter | |
class Tudui(nn.Module): | |
def __init__(self): | |
super(Tudui, self).__init__() | |
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=False) | |
def forward(self,x): | |
x = self.maxpool1(x) | |
return x | |
# 准备数据集 | |
dataset = torchvision.datasets.CIFAR10("../dataset", | |
train=False, | |
download=True, | |
transform=torchvision.transforms.ToTensor()) | |
# 加载数据集 | |
dataloader = DataLoader(dataset, batch_size=64) | |
# tensorboard 可视化 | |
writer = SummaryWriter("../logs_maxpool") | |
# 实例化网络模型 | |
tudui = Tudui() | |
# 数据导入 | |
step = 0 | |
for data in dataloader: | |
img, target = data | |
writer.add_images("input", img, step) | |
output = tudui(img) # 前向传播,相比示例 1,由于 DataLoader 返回的是 4 维张量,所以不需要再 reshape | |
writer.add_images("output", output, step) | |
step = step + 1 | |
writer.close() |
# 激活函数
torch.nn.ReLU()
torch.nn.ReLU(inplace=False)
参数:
- inplace:是否在原地操作,默认为 False
示例:
import torch | |
import torchvision | |
from torch import nn | |
from torch.nn import ReLU | |
# 定义网络 | |
class Net(nn.Module): | |
def __init__(self): | |
super(Net, self).__init__() | |
self.relu1 = ReLU() | |
def forward(self,x): | |
x = self.relu1(x) | |
return x | |
input = torch.tensor([ | |
[1, -0.5], | |
[1.5, 1]]) | |
print(input.shape)# 将数据转换为 4 维张量 (batch_size,channel,height,width) | |
input = torch.reshape(input,(-1,1,2,2)) | |
print(input.shape) | |
# 实例化网络 | |
net = Net() | |
# 前向传播 | |
output = net(input) | |
# 打印输出 | |
print(output) |
示例 2:采用图像数据集
import torchvision | |
from torch import nn | |
from torch.nn import ReLU, Sigmoid | |
# 定义网络 | |
from torch.utils.data import DataLoader | |
from torch.utils.tensorboard import SummaryWriter | |
class Net(nn.Module): | |
def __init__(self): | |
super(Net, self).__init__() | |
self.relu1 = ReLU() | |
self.sigmoid1 = Sigmoid() | |
def forward(self, x): | |
# x = self.relu1(x) | |
x = self.sigmoid1(x) | |
return x | |
# 准备数据集 | |
dataset = torchvision.datasets.CIFAR10("../dataset", | |
train=False, | |
transform=torchvision.transforms.ToTensor(), | |
download=True) | |
# 读取数据 | |
dataloader = DataLoader(dataset, batch_size=64) | |
# tensorboard 可视化 | |
writer = SummaryWriter("../logs_relu") | |
# 实例化网络 | |
net = Net() | |
step = 0 | |
for data in dataloader: | |
img, target = data | |
writer.add_images("img", img, step) | |
# 前向传播 | |
output = net(img) | |
writer.add_images("output", output, step) | |
step = step+1 | |
writer.close() |
# 归一化层
介绍:在神经网络中,每一层的输入数据分布可能都会发生变化,导致网络收敛速度变慢。通过归一化层,可以使得每一层的输入数据分布相近,从而加快网络收敛速度。
torch.nn.BatchNorm2d()
torch.nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
参数:
- num_features:输入通道数
- eps:防止除 0
- momentum:动量
- affine:是否需要缩放和平移
- track_running_stats:是否需要跟踪运行中的统计量
示例:
import torch | |
import torchvision | |
from torch import nn | |
from torch.nn import BatchNorm2d | |
# 定义网络 | |
class Net(nn.Module): | |
def __init__(self): | |
super(Net, self).__init__() | |
self.batchnorm1 = BatchNorm2d(6) | |
def forward(self,x): | |
x = self.batchnorm1(x) | |
return x | |
input = torch.tensor([ | |
[[1,2,0,3,1], | |
[0,1,2,3,1], | |
[1,0,1,0,0], | |
[5,2,3,1,1], | |
[2,1,0,1,1]], | |
[[1,2,0,3,1], | |
[0,1,2,3,1], | |
[1,0,1,0,0], | |
[5,2,3,1,1], | |
[2,1,0,1,1]]],dtype =torch.float32) # 将数据类型转换为 float32 | |
print(input.shape)# 将数据转换为 4 维张量 (batch_size,channel,height,width) | |
input = torch.reshape(input,(-1,6,5,5)) | |
print(input.shape) | |
# 实例化网络 | |
net = Net() | |
# 前向传播 | |
output = net(input) | |
# 打印输出 | |
print(output) |
# 线性层
介绍:线性层是神经网络中的一种层,它可以将输入数据映射到输出数据。线性层由权重矩阵和偏置向量组成,其计算公式为: output = input * weight + bias
torch.nn.Linear(in_features, out_features, bias=True)
torch.nn.Linear(in_features, out_features, bias=True)
参数:
- in_features:输入特征数
- out_features:输出特征数
- bias:是否需要偏置
示例:
import torch | |
import torchvision | |
from torch import nn | |
from torch.nn import Linear | |
from torch.utils.data import DataLoader | |
# 定义网络 | |
class Net(nn.Module): | |
def __init__(self): | |
super(Net, self).__init__() | |
self.linear1 = Linear(196608, 10) | |
def forward(self,x): | |
x = self.linear1(x) | |
return x | |
# 准备数据集 | |
dataset = torchvision.datasets.CIFAR10("../dataset", | |
train=False, | |
transform=torchvision.transforms.ToTensor(), | |
download=True) | |
# 读取数据 | |
dataloader = DataLoader(dataset, batch_size=64, drop_last=True) # drop_last=True 表示如果最后一个 batch 的大小小于 batch_size,则丢弃 | |
# 实例化网络 | |
net = Net() | |
for data in dataloader: | |
img, target = data | |
print(img.shape) | |
# 数据 | |
output = torch.flatten(img)# 将数据转换为 1 维张量,因为线性层只能接受 1 维张量 | |
# 前向传播 | |
output = net(output) | |
print(output.shape) |
# Sequential
介绍:Sequential 是 PyTorch 中的一种容器,可以将多个层按顺序组合在一起,形成一个网络。Sequential 容器中的层将按照添加的顺序依次执行。
torch.nn.Sequential(*args)
torch.nn.Sequential(*args)
参数:
- *args:可变参数,表示需要添加的层
示例:
import torch | |
from torch import nn | |
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential | |
from torch.utils.tensorboard import SummaryWriter | |
# 定义网络 | |
class Net(nn.Module): | |
def __init__(self): | |
super(Net, self).__init__() | |
self.model1 = Sequential( | |
Conv2d(3, 32, 5, padding=2), | |
MaxPool2d(2), | |
Conv2d(32, 32, 5, padding=2), | |
MaxPool2d(2), | |
Conv2d(32, 64, 5, padding=2), | |
MaxPool2d(2), | |
Flatten(), | |
Linear(1024, 64), | |
Linear(64, 10) | |
) | |
def forward(self,x): | |
return self.model1(x) | |
net = Net() | |
print(net) | |
# 准备数据集 | |
input = torch.ones((64,3,32,32)) | |
# 前向传播 | |
output = net(input) | |
print(output.shape) | |
# tensorboard 可视化 | |
writer = SummaryWriter("../log_seq") | |
# 添加网络结构图 | |
writer.add_graph(net, input) | |
writer.close() |
# 损失函数
介绍:损失函数是神经网络中的一种函数,用于衡量网络预测结果与真实结果之间的差距。损失函数的值越小,表示网络的预测结果越接近真实结果。
-
torch.nn.L1Loss()
torch.nn.L1Loss(size_average=None, reduce=None, reduction='mean')
参数:
- size_average:是否对损失进行平均
- reduce:是否对损失进行求和
- reduction:损失函数的归约方式,可选值有’mean’、‘sum’、‘none’
示例:
import torch | |
from torch import nn | |
from torch.nn import L1Loss | |
inputs = torch.tensor([1, 2, 3], dtype=torch.float32) | |
target = torch.tensor([1, 2, 5], dtype=torch.float32) | |
inputs = torch.reshape(inputs, (1, 1, 1, 3)) | |
target = torch.reshape(target, (1, 1, 1, 3)) | |
loss = L1Loss(reduction="sum") | |
result = loss(inputs, target) | |
loss_mse = nn.MSELoss() | |
results_mse = loss_mse(inputs, target) | |
print(result) | |
print(results_mse) |
-
torch.nn.CrossEntropyLoss()
示例:
import torch | |
from torch import nn | |
from torch.nn import CrossEntropyLoss | |
x = torch.tensor([0.1,0.2,0.3]) | |
y = torch.tensor([1]) | |
x = torch.reshape(x,(1, 3)) | |
loss_cross = nn.CrossEntropyLoss() | |
result_cross = loss_cross(x,y) | |
print(result_cross) |
# 反向传播
介绍:反向传播是神经网络中的一种算法,用于计算损失函数相对于网络参数的梯度。反向传播算法通过链式法则,从输出层开始,逐层向前计算每一层的梯度,最终得到损失函数相对于网络参数的梯度。
示例:
import torchvision | |
from torch import nn | |
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential | |
from torch.utils.data import DataLoader | |
dataset = torchvision.datasets.CIFAR10("../dataset", | |
train=False, | |
transform=torchvision.transforms.ToTensor(), | |
download=True) | |
dataloader = DataLoader(dataset, batch_size=64) | |
class Net(nn.Module): | |
def __init__(self): | |
super(Net, self).__init__() | |
self.model1 = Sequential( | |
Conv2d(3, 32, 5, padding=2), | |
MaxPool2d(2), | |
Conv2d(32, 32, 5, padding=2), | |
MaxPool2d(2), | |
Conv2d(32, 64, 5, padding=2), | |
MaxPool2d(2), | |
Flatten(), | |
Linear(1024, 64), | |
Linear(64, 10) | |
) | |
def forward(self, x): | |
return self.model1(x) | |
net = Net() | |
loss_Cross = nn.CrossEntropyLoss() | |
step = 0 | |
for data in dataloader: | |
imgs, target = data | |
output = net(imgs) | |
result_loss = loss_Cross(output, target) | |
# 反向传播,得到梯度的值 | |
result_loss.backward() |
# 优化器
梯度下降优化算法站内文章介绍:优化器是神经网络中的一种算法,用于更新网络参数。优化器通过计算损失函数相对于网络参数的梯度,并根据梯度的大小和方向,更新网络参数,以最小化损失函数的值。
torch.optim.SGD()
示例:
import torch | |
import torchvision | |
from torch import nn | |
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential | |
from torch.utils.data import DataLoader | |
from torch.utils.tensorboard import SummaryWriter | |
# 准备数据集 | |
dataset = torchvision.datasets.CIFAR10("../dataset", | |
train=False, | |
transform=torchvision.transforms.ToTensor(), | |
download=True) | |
# 加载 | |
dataloader = DataLoader(dataset, batch_size=64) | |
# 定义网络 | |
class Net(nn.Module): | |
def __init__(self): | |
super(Net, self).__init__() | |
self.model1 = Sequential( | |
Conv2d(3, 32, 5, padding=2), | |
MaxPool2d(2), | |
Conv2d(32, 32, 5, padding=2), | |
MaxPool2d(2), | |
Conv2d(32, 64, 5, padding=2), | |
MaxPool2d(2), | |
Flatten(), | |
Linear(1024, 64), | |
Linear(64, 10) | |
) | |
def forward(self, x): | |
return self.model1(x) | |
net = Net() | |
# 定义损失函数 | |
loss_Cross = nn.CrossEntropyLoss() | |
# 定义优化器,传入网络参数和学习率 | |
optim = torch.optim.SGD(net.parameters(), lr=0.01) | |
for epoch in range(20): | |
running_loss = 0.0 | |
for data in dataloader: | |
imgs, target = data | |
# 前向传播 | |
output = net(imgs) | |
# 计算损失 | |
result_loss = loss_Cross(output, target) | |
optim.zero_grad() | |
result_loss.backward() | |
# 使用优化器更新参数 | |
optim.step() | |
# 计算 epoch 的 loss | |
running_loss = running_loss + result_loss | |
print(running_loss) |
# 迁移学习
介绍:迁移学习是一种机器学习方法,它利用已经训练好的模型来学习新的任务。迁移学习可以加速模型的训练过程,提高模型的泛化能力。
示例:
import torchvision | |
from torch import nn | |
vgg16_false = torchvision.models.vgg16(pretrained=False) | |
vgg16_True = torchvision.models.vgg16(pretrained=True) | |
print(vgg16_True) | |
# 最后添加一层 | |
vgg16_True.add_module("add_linear", nn.Linear(1000, 10)) | |
# classifier 模块最后添加一层 | |
vgg16_True.classifier.add_module("add_linear", nn.Linear(1000, 10)) | |
# 修改 classifier 模块的第 7 层 | |
vgg16_false.classifier[6] = nn.Linear(4096, 10) |
# 模型保存和加载
介绍:模型保存和加载是神经网络中的一种操作,用于保存和加载已经训练好的模型。模型保存和加载可以方便地保存和加载模型,以便后续使用。
-
torch.save()
示例:模型保存
import torch | |
import torchvision | |
vgg16 = torchvision.models.vgg16(pretrained=False) | |
# 保存方式 1,模型结构 + 参数 | |
torch.save(vgg16, "vgg16_method1.pth") | |
# 保存方式 2,只保存参数(官方推荐) | |
torch.save(vgg16.state_dict(), "vgg16_method2.pth") |
-
torch.load()
示例:模型加载
import torch | |
import torchvision | |
# 保存方式 1 的模型加载 | |
model = torch.load("vgg16_method1.pth") | |
print(model) | |
#-----------------------------------#-----------------------------------# | |
# 保存方式 2 的模型加载 | |
# 需要先定义模型结构 | |
model = torchvision.models.vgg16(pretrained=False) | |
# 加载参数 | |
model.load_state_dict(torch.load("vgg16_method2.pth")) | |
print(model) |
# 完整模型训练示例
项目文件列表如下:
├─dataset
├─logs_train
├─model.py
└─train.py
model.py
文件内容:
# 搭建神经网络 | |
import torch | |
from torch import nn | |
class Net(nn.Module): | |
def __init__(self): | |
super(Net, self).__init__() | |
self.model = nn.Sequential( | |
nn.Conv2d(3, 32, 5, 1, 2), | |
nn.MaxPool2d(2), | |
nn.Conv2d(32, 32, 5, 1, 2), | |
nn.MaxPool2d(2), | |
nn.Conv2d(32, 64, 5, 1, 2), | |
nn.MaxPool2d(2), | |
nn.Flatten(), | |
nn.Linear(64*4*4, 64), | |
nn.Linear(64, 10) | |
) | |
def forward(self, x): | |
return self.model(x) | |
if __name__ == '__main__': | |
net = Net() | |
inputs = torch.ones(64, 3, 32, 32) | |
output = net(inputs) | |
print(output.shape) |
train.py
文件内容:
import torchvision | |
from torch.utils.data import DataLoader | |
from torch.utils.tensorboard import SummaryWriter | |
from model import * | |
# 准备数据集 | |
train_data = torchvision.datasets.CIFAR10("../dataset", | |
train=True, | |
transform=torchvision.transforms.ToTensor(), | |
download=True) | |
test_data = torchvision.datasets.CIFAR10("../dataset", | |
train=False, | |
transform=torchvision.transforms.ToTensor(), | |
download=True) | |
# lenth 长度 | |
train_data_size = len(train_data) | |
test_data_size = len(test_data) | |
# 如果 train_data_size = 10, 训练数据长度为:10 | |
print("训练数据集长度为{}".format(train_data_size)) | |
print("测试数据集长度为{}".format(test_data_size)) | |
# 加载数据集 | |
train_dataload = DataLoader(train_data, batch_size=32) | |
test_dataload = DataLoader(test_data, batch_size=32) | |
# 创建网络模型 | |
net = Net() | |
# 损失函数 | |
loss_fn = nn.CrossEntropyLoss() | |
# 优化器 | |
learn_rate = 1e-2 | |
optimizer = torch.optim.SGD(net.parameters(), lr=learn_rate) | |
# 设置网络训练的参数 | |
total_train_step = 0 | |
# 记录测试的次数 | |
total_test_step = 0 | |
# 训练的轮数 | |
epoch = 10 | |
# 添加 tensorboard | |
writer = SummaryWriter("../logs_train") | |
for i in range(epoch): | |
print("----------第 {} 轮训练开始-----------".format(i)) | |
# 训练步骤开始 | |
net.train() | |
for data in train_dataload: | |
imgs, target = data | |
# 前向传递 | |
output = net(imgs) | |
# 计算损失值 | |
loss = loss_fn(output, target) | |
# 优化器 | |
optimizer.zero_grad() | |
# 反向传播,得到参数的梯度 | |
loss.backward() | |
# 优化器更新参数 | |
optimizer.step() | |
if total_train_step%100 == 0: | |
print("训练次数: {}, loss: {}".format(total_train_step, loss.item())) | |
writer.add_scalar("train_loss", loss.item(),total_train_step) | |
total_train_step = total_train_step + 1 | |
# 测试步骤开始 | |
net.eval() | |
total_test_loss = 0.0 | |
total_accuracy = 0 | |
with torch.no_grad(): | |
for data in test_dataload: | |
imgs, target = data | |
output = net(imgs) | |
loss = loss_fn(output, target) | |
# 测试集损失值 | |
total_test_loss = total_test_loss + loss | |
# 计算正确预测数量 | |
accuracy = (output.argmax(1) == target).sum() | |
total_accuracy = total_accuracy + accuracy | |
print("整体测试集上的Loss: {}".format(total_test_loss)) | |
print("整体测试集上的正确率: {}".format(total_accuracy / test_data_size)) | |
writer.add_scalar("test_loss", total_test_loss.item(), total_test_step) | |
writer.add_scalar("test_accuracy", total_accuracy / test_data_size, total_test_step) | |
total_test_step = total_test_step + 1 | |
torch.save(net, "net_{}.pth".format(i)) | |
# torch.save(net.state_dict(), "net_{}.pth".format(i)) | |
print("模型已保存") | |
writer.close() |
# GPU 加速
一些常用的 GPU 操作:
# 查看当前机器的 GPU 是否可用 | |
torch.cuda.is_available() | |
# 查看当前机器的 GPU 数量 | |
torch.cuda.device_count() | |
# 查看当前机器的 GPU 型号 | |
torch.cuda.get_device_name(0) | |
# 查看当前机器的 GPU 型号 | |
torch.cuda.current_device() | |
# 查看当前机器的 GPU 型号 | |
torch.cuda.get_device_name(torch.cuda.current_device()) |
pytorch 模型训练时,将模型、输入数据、标签和损失函数放到 GPU 上,可以加速训练过程。
- 方式一:
使用.cuda()
方法,将模型、输入数据、标签和损失函数放到 GPU 上
import os | |
# 设置使用的 GPU 编号,避免 CUDA 默认的全局显存预分配行为,减少隐性显存占用 | |
os.environ['CUDA_VISIBLE_DEVICES']='0' | |
# 定义训练设备 | |
device = torch.device("cuda:0") | |
# 查看当前机器的 GPU 是否可用 | |
if torch.cuda.is_available(): | |
print("GPU可用") | |
net = Net() | |
# 将模型放到 GPU 上 | |
net = net.cuda() | |
# 将输入数据放到 GPU 上 | |
imgs = imgs.cuda() | |
# 将标签放到 GPU 上 | |
target = target.cuda() | |
# 将损失函数放到 GPU 上 | |
loss_fn = loss_fn.cuda() |
- 方式二:
使用 .to(device)
方法,将模型、输入数据、标签和损失函数放到 GPU 上
import os | |
# 设置使用的 GPU 编号,避免 CUDA 默认的全局显存预分配行为,减少隐性显存占用 | |
os.environ['CUDA_VISIBLE_DEVICES']='0' | |
# 查看当前机器的 GPU 是否可用 | |
if torch.cuda.is_available(): | |
print("GPU可用") | |
net = Net() | |
# 将模型放到 GPU 上 | |
net = net.to("cuda") | |
# 将输入数据放到 GPU 上 | |
imgs = imgs.to("cuda") | |
# 将标签放到 GPU 上 | |
target = target.to("cuda") | |
# 将损失函数放到 GPU 上 | |
loss_fn = loss_fn.to("cuda") |
# 使用训练模型
import torch | |
import torchvision | |
from PIL import Image | |
from torch import nn | |
import os | |
os.environ['CUDA_VISIBLE_DEVICES']='0' | |
device = torch.device("cuda:0") | |
# 加载图片 | |
image_path = "../images/img_1.png" | |
image = Image.open(image_path) | |
# png 为四通道,需要转换为三通道 | |
image = image.convert('RGB') | |
print(image) | |
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)), | |
torchvision.transforms.ToTensor()]) | |
image = transform(image) | |
print(image.shape) | |
class Net(nn.Module): | |
def __init__(self): | |
super(Net, self).__init__() | |
self.model = nn.Sequential( | |
nn.Conv2d(3, 32, 5, 1, 2), | |
nn.MaxPool2d(2), | |
nn.Conv2d(32, 32, 5, 1, 2), | |
nn.MaxPool2d(2), | |
nn.Conv2d(32, 64, 5, 1, 2), | |
nn.MaxPool2d(2), | |
nn.Flatten(), | |
nn.Linear(64*4*4, 64), | |
nn.Linear(64, 10) | |
) | |
def forward(self, x): | |
return self.model(x) | |
model = torch.load("net_19.pth") | |
model.to(device) | |
print(model) | |
image = torch.reshape(image, (1, 3, 32, 32)) | |
model.eval() | |
with torch.no_grad(): | |
image = image.to(device) | |
output = model(image) | |
print(output) | |
print(output.argmax(1)) |