!!!梯度的产生是由于反向传播,在自定义从零开始编写代码时,第一次反向传播前应该对params参数的梯度进行判断
import torch
import numpy as np
import torch.utils.data as Data
import torchvision.datasets
import torchvision.transforms as transforms
import sys
sys.path.append("路径")
import d2lzh_pytorch as d2l
'''
--------------------------------------------------获取和读取数据
'''
batch_size = 256
train_mnist = torchvision.datasets.FashionMNIST(root='路径',
download=True, train=True, transform=transforms.ToTensor())
test_mnist = torchvision.datasets.FashionMNIST(root='路径',
download=True, train=False, transform=transforms.ToTensor())
train_iter = Data.DataLoader(train_mnist, batch_size=batch_size, shuffle=True)
test_iter = Data.DataLoader(test_mnist, batch_size=batch_size, shuffle=False)
'''
--------------------------------------------------定义模型参数
'''
num_inputs = 784
num_outputs = 10
num_hidden = 256
w1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hidden)), dtype=torch.float)
b1 = torch.zeros(num_hidden, dtype=torch.float)
w2 = torch.tensor(np.random.normal(0, 0.1, (num_hidden, num_outputs)), dtype=torch.float)
b2 = torch.zeros(num_outputs, dtype=torch.float)
params = [w1, b1, w2, b2]
for param in params:
param.requires_grad_(requires_grad=True)
'''
---------------------------------------------定义激活函数
'''
def relu(X):
return torch.max(input=X, other=torch.tensor(0.0))
'''
---------------------------------------------------定义模型
'''
def net(X):
X = X.view((-1, num_inputs))
H = relu(torch.matmul(X, w1) + b1)
return torch.matmul(H, w2) + b2
'''
-----------------------------------------------------定义损失函数
'''
loss = torch.nn.CrossEntropyLoss()
'''
------------------------------------------------------softmax操作,用于训练模型中训练集准确率调用
'''
def softmax(X):
X_exp = X.exp()
partition = X_exp.sum(dim=1, keepdim=True)
return X_exp / partition
'''
----------------------------------------------------测试集准确率函数,训练模型中测试集准确率调用
'''
def evaluate_accuracy(test_data):
acc_num, num = 0.0, 0
for X, y in test_data:
acc_num += (softmax(net(X)).argmax(dim=1) == y).float().sum().item()
num += y.shape[0]
return acc_num / num
'''
------------------------------------------------------训练模型
'''
num_epochs, lr = 5, 100
def train():
for epoch in range(num_epochs):
train_acc, train_l, test_acc, n, num = 0.0, 0.0, 0.0, 0, 0
for X, y in train_iter:
l = loss(net(X), y)
if params[0].grad is not None:
for param in params:
param.grad.data.zero_()
l.backward()
d2l.sgd(params, lr, batch_size)
train_l += l.item()
train_acc += (softmax(net(X)).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
num += 1
test_acc = evaluate_accuracy(test_iter)
print(f'epoch %d, loss %.4f, train_acc %.3f, test_acc %.3f'
% (epoch + 1, train_l / num, train_acc / n, test_acc))
train()