机器学习:DNN

In [28]:
import dai
import pandas as pd
import numpy as np
import time
from datetime import datetime

## 准备开始事件和结束时间
## 全部数据集
sd_data = '2020-01-01'
ed_data = datetime.now().date().strftime("%Y-%m-%d")

## 训练集
sd_tran = "2020-01-01"
ed_tran = "2022-12-31"

## 测试集
sd_test = "2023-01-01"
ed_test = ed_data

## 提取数据的SQL
sql = f"""

SELECT
    date,
    instrument,
    -- Label
    IF(m_lead(close, 5) > m_lead(open,1), 1, 0) AS label,
    -- Feature
    LOG(float_market_cap),
    pe_ttm,
    ps_ttm,
    trading_days,
    close,
    volume,
    turn,
FROM cn_stock_factors 
WHERE date BETWEEN '{sd_data}' AND '{ed_data}'
AND st_status = 0
AND suspended = 0
AND list_sector < 4
AND is_sh50 = 1
QUALIFY COLUMNS(*) IS NOT NULL
ORDER BY date, instrument
    
"""

data = dai.query(sql, filters={'date':[sd_data, ed_data]}).df()
data_tran = data[(sd_tran <= data['date']) & (data['date'] <= ed_tran)]
data_test = data[(sd_test <= data['date']) & (data['date'] <= ed_test)]

x_tran = data_tran.drop(['date', 'instrument', 'label'], axis=1)
y_tran = data_tran['label']
x_test = data_test.drop(['date', 'instrument', 'label'], axis=1)
y_test = data_test['label']

x_tran_numpy = np.array(x_tran)
y_tran_numpy = np.array(y_tran)
x_test_numpy = np.array(x_test)
y_test_numpy = np.array(y_test)
In [29]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
In [30]:
# 训练集和测试集划分
x_tran_torch = torch.Tensor(x_tran_numpy)
y_tran_torch = torch.Tensor(y_tran_numpy.reshape([-1, 1]))

x_test_torch = torch.Tensor(x_test_numpy).to(device)
y_test_torch = torch.Tensor(y_test_numpy.reshape([-1, 1]))

batch_size = 128
tran_dataset = TensorDataset(x_tran_torch, y_tran_torch)
tran_datalod = DataLoader(tran_dataset, batch_size=batch_size)
In [31]:
class DNN(nn.Module):
    
    def __init__(self, input_dim):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 16)
        self.fc5 = nn.Linear(16, 1)
        self.dropout = nn.Dropout(0.1)
        self.relu = nn.ReLU()
    
    def forward(self, data):
        # 第1层
        out = self.fc1(data)
        out = self.relu(out) # 激活函数层
        out = self.dropout(out) # dropout层
        
        # 第2层
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout(out)

        # 第3层
        out = self.fc3(out)
        out = self.relu(out)
        out = self.dropout(out)

        # 第4层
        out = self.fc4(out)
        out = self.relu(out)
        out = self.dropout(out)

        # 输出层
        out = self.fc5(out)
        return out
In [32]:
# 迭代次数
num_epochs = 100

# 模型准备
input_dim = x_tran_torch.shape[1] # 因子个数
model = DNN(input_dim).to(device)

# 优化器
learning_rate = 0.01 # 学习率
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 损失函数
criterion = nn.MSELoss()

t0 = time.time()

# 开始训练
for i in range(num_epochs):
    
    for idx, (x, y) in enumerate(tran_datalod):

        # 加在数据
        x, y = x.to(device), y.to(device)
        
        # 前向传播
        out = model(x)
        loss = criterion(out, y)

        # 后向传播
        optimizer.zero_grad() # 清除之前的梯度
        loss.backward() # 计算当前的梯度
        optimizer.step() # 根据梯度更新模型参数

    if i%1 == 0:
        print(f"Epoch: {i+1}, Loss: {round(loss.item(), 8)}, 耗时 {round(time.time()-t0, 4)} 秒")
    
print()
print("训练结束")
print('模型训练时间消耗:', f"{round(time.time()-t0, 4)}", "秒")

## 模型保存
# torch.save(model.state_dict(), "/home/aiuser/work/2023年训练营‘/DNN_model.pkl")
# print('运行并保存成功!')
In [33]:
## 模型加载
# model = DNN()
# model.load_state_dict(torch.load('/home/aiuser/work/2023年训练营‘/DNN_model.pkl', map_location=torch.device('cpu')))
In [34]:
y_pred_torch = model(x_test_torch).detach().cpu()
y_pred = y_pred_torch.numpy().reshape([1, -1])[0]

data_pred = data_test.assign(pred_label = y_pred)
data_pred.sort_values(['date','pred_label'], inplace=True, ascending=[True,False])
data_pred
Out[34]:
date instrument label log(float_market_cap) pe_ttm ps_ttm trading_days close volume turn pred_label
36379 2023-01-03 600010.SH 0 10.786304 168.770474 1.001903 5298 16.211348 197091166 0.006222 0.468404
36380 2023-01-03 600028.SH 0 11.621704 7.776415 0.165244 5195 17.269679 85476673 0.000899 0.468404
36381 2023-01-03 600030.SH 1 11.354487 13.392450 4.303416 4857 122.773864 69294429 0.006096 0.468404
36382 2023-01-03 600031.SH 1 11.126587 43.803776 1.725819 4743 799.490524 44506741 0.005254 0.468404
36383 2023-01-03 600036.SH 1 11.889434 7.113780 2.749718 5039 203.484463 60626616 0.002939 0.468404
... ... ... ... ... ... ... ... ... ... ... ...
51924 2024-04-17 603986.SH 0 10.701926 128.128239 8.778686 1860 426.692962 34327549 0.051660 0.468404
51925 2024-04-17 688041.SH 0 10.847808 147.279269 30.944786 406 80.079301 19643822 0.022322 0.468404
51926 2024-04-17 688111.SH 0 11.125698 101.360726 29.316882 1071 291.450680 2656171 0.005752 0.468404
51927 2024-04-17 688599.SH 0 10.682526 7.575749 0.445873 935 22.773000 12190078 0.005593 0.468404
51928 2024-04-17 688981.SH 0 10.915918 68.839443 7.336944 911 41.750000 18126686 0.009185 0.468404

15550 rows × 11 columns

In [37]:
from bigmodule import M
from bigtrader.finance.commission import PerOrder

def m_initialize_bigquant_run(context):
    context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
    context.holding_days = 5
    context.target_hold_count = 3
    context.target_percent_per_instrument = 1.0 / context.target_hold_count

def m_before_trading_start_bigquant_run(context, data):
    pass

def m_handle_tick_bigquant_run(context, tick):
    pass

def m_handle_data_bigquant_run(context, data):
    if context.trading_day_index % context.holding_days != 0:
        return
    current_date = data.current_dt.strftime("%Y-%m-%d")
    current_day_data = context.data[context.data["date"] == current_date]
    current_day_data = current_day_data.iloc[:context.target_hold_count]
    target_hold_instruments = set(current_day_data["instrument"])
    current_hold_instruments = set(context.get_account_positions().keys())
    sell_set = current_hold_instruments - target_hold_instruments
    buy_set  = target_hold_instruments - current_hold_instruments
    for instrument in sell_set:
        context.order_target_percent(instrument, 0)
    for instrument in buy_set:
        context.order_target_percent(instrument, context.target_percent_per_instrument)

def m_handle_trade_bigquant_run(context, trade):
    pass

def m_handle_order_bigquant_run(context, order):
    pass

def m_after_trading_bigquant_run(context, data):
    pass

m = M.bigtrader.v14(
    data=data_pred,
    start_date='',
    end_date='',
    initialize=m_initialize_bigquant_run,
    before_trading_start=m_before_trading_start_bigquant_run,
    handle_tick=m_handle_tick_bigquant_run,
    handle_data=m_handle_data_bigquant_run,
    handle_trade=m_handle_trade_bigquant_run,
    handle_order=m_handle_order_bigquant_run,
    after_trading=m_after_trading_bigquant_run,
    capital_base=500000,
    frequency='daily',
    product_type='股票',
    before_start_days=0,
    volume_limit=1,
    order_price_field_buy='open',
    order_price_field_sell='close',
    benchmark='000001.SH',
    plot_charts=True,
    disable_cache=False,
    debug=False,
    backtest_only=False,
    m_cached=False
)
BigTrader(高性能回测/交易)
收益:年化收益越大得分越高
抗风险:回撤越低得分越高
分散度:持仓和交易股票数越多得分越高,股票数超过10只后趋于稳定
稳定性:超额收益越高,波动越小,得分越高
模拟时长:模拟时间越长得分越高,超过100天后得分趋于稳定

+9.04%
收益率
  • 年化收益率+6.96%
  • 基准收益率-1.45%
  • 阿尔法0.09
  • 贝塔0.89
  • 夏普比率0.33
  • 胜率0.0
  • 盈亏比0.0
  • 收益波动率16.52%
  • 信息比率0.05
  • 最大回撤18.6%
评分规则
日期 时间 证券代码 证券名称 买/卖 数量 成交价 成交金额 平仓盈亏 交易费用
Loading... (need help?)
日期 证券代码 证券名称 数量 持仓均价 收盘价 持仓市值 持仓占比 收益
Loading... (need help?)
时间 级别 内容
Loading... (need help?)