参考资料了解了一些损失值居高不下的原因,
一一进行尝试
1.调整batchsize,初始设置了512,疑似过大,调整为32与16进行尝试
依然稳定在0.32左右,没有较大改变
2.尝试变更初始化矩阵
选择初始化0、均值、正态进行尝试
class MLP(torch.nn.Module):
def __init__(self,n_feature,n_hidden,num_labels,dropout = 0.5):
super(MLP, self).__init__()
self.dropout = torch.nn.Dropout(dropout)
# 定义隐藏层和输出层
self.hidden_1 = torch.nn.Linear(n_feature, n_hidden)
init.normal_(self.hidden_1.weight, mean=0, std=0.01) # 正态分布初始化隐藏层权重
self.bn1 = torch.nn.BatchNorm1d(n_hidden)
# 逐层减少参数数量,有助于防止过拟合
self.hidden_2 = torch.nn.Linear(n_hidden, n_hidden // 2)
init.normal_(self.hidden_2.weight, mean=0, std=0.01) # 正态分布初始化隐藏层权重
self.bn2 = torch.nn.BatchNorm1d(n_hidden // 2)
self.hidden_3 = torch.nn.Linear(n_hidden // 2, n_hidden // 4)
init.normal_(self.hidden_3.weight, mean=0, std=0.01) # 正态分布初始化隐藏层权重
self.bn3 = torch.nn.BatchNorm1d(n_hidden // 4)
self.hidden_4 = torch.nn.Linear(n_hidden // 4, n_hidden // 8)
init.normal_(self.hidden_4.weight, mean=0, std=0.01) # 正态分布初始化隐藏层权重
self.bn4 = torch.nn.BatchNorm1d(n_hidden // 8)
self.out = torch.nn.Linear(n_hidden // 8, num_labels)
#前向
# 传播
def forward(self, x):
x = F.relu(self.hidden_1(x))
x = self.dropout(self.bn1(x))
x = F.relu(self.hidden_2(x))
x = self.dropout(self.bn2(x))
x = F.relu(self.hidden_3(x))
x = self.dropout(self.bn3(x))
x = F.relu(self.hidden_4(x))
x = self.dropout(self.bn4(x))
x = self.out(x)
return x
基本没有改善
疑似模型结构或特征提取有异常