DNN自定义损失函数

对于分类问题, 我们通常会对交叉熵的损失函数进行优化, 统计学上还有一个极大似然的概念, 实际发生的事件的概率要最大.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
In [2]:
data = load_iris()
X = data.data
y = data.target
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)
xtrain = torch.Tensor(xtrain)
xtest = torch.Tensor(xtest)
ytrain = torch.Tensor(ytrain).long()
ytest = torch.Tensor(ytest).long()
ytrain = nn.functional.one_hot(ytrain)
ytest = nn.functional.one_hot(ytest)
In [3]:
# 搭建网络
class Network(nn.Module):
    def __init__(self, input_size, class_num):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.1)
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.1)
        self.fc3 = nn.Linear(128, class_num)
        self.out = nn.Softmax(1)
    
    def forward(self, data):
        out = self.fc1(data)
        out = self.relu1(out)
        out = self.dropout1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.dropout2(out)
        out = self.fc3(out)
        out = self.out(out)
        return out
In [4]:
model = Network(xtrain.shape[1], ytrain.shape[1])
In [5]:
def negative_log_likelihood(ypre, ytrue):
    """
    负对数似然损失
    :params ypre: 预测的概率分布
    :params ytrue: 实际的概率分布(一般标签是确定的, 所以传入的是独热码形式)
    """
    pre_p = ypre*ytrue
    pre_p = pre_p[pre_p!=0]
    return -torch.sum(torch.log(pre_p))/len(ypre)
In [6]:
# 初始化优化器
sgd = optim.SGD(model.parameters(), lr=0.0001)
In [9]:
epochs = 500

train_loss = []
test_loss = []

for epoch in range(epochs):
    ypre = model(xtrain)
    loss = negative_log_likelihood(ypre=ypre, ytrue=ytrain)
    loss.backward()
    sgd.step()
    if epoch%10 == 0:
        test_ypre = model(xtest)
        te_loss = negative_log_likelihood(test_ypre, ytest)
        print(f'Epoch[{epoch}]/Epoch[{epochs}] | train_loss:{round(loss.item(), 4)} | test_loss:{round(te_loss.item(), 4)}')
        train_loss.append(round(loss.item(), 4))
        test_loss.append(round(te_loss.item(), 4))
In [10]:
fig = plt.figure()
plt.plot(range(0, epochs, 10), train_loss, label='train_loss', color='red')
plt.plot(range(0, epochs, 10), test_loss, label='test_loss', color='blue')
plt.legend()
plt.show()
In [19]:
def acc(ypre, ytrue):
    """
    评估模型分类的正确性
    :params ypre: 预测标签(概率分布型数据)
    :params ytrue: 实际标签(独热码数据)
    """
    pre_label = torch.argmax(ypre, axis=1)
    true_label = torch.argmax(ytrue, axis=1)
    accur = torch.sum(pre_label==true_label)/len(true_label)
    return accur
In [20]:
# 训练集准确率
acc(model(xtrain), ytrain)
Out[20]:
tensor(0.9333)
In [21]:
# 测试集准确率
acc(model(xtest), ytest)
Out[21]:
tensor(0.9667)
In [ ]: