机器学习：DNN¶

In [28]:

import dai
import pandas as pd
import numpy as np
import time
from datetime import datetime

## 准备开始事件和结束时间
## 全部数据集
sd_data = '2020-01-01'
ed_data = datetime.now().date().strftime("%Y-%m-%d")

## 训练集
sd_tran = "2020-01-01"
ed_tran = "2022-12-31"

## 测试集
sd_test = "2023-01-01"
ed_test = ed_data

## 提取数据的SQL
sql = f"""

SELECT
    date,
    instrument,
    -- Label
    IF(m_lead(close, 5) > m_lead(open,1), 1, 0) AS label,
    -- Feature
    LOG(float_market_cap),
    pe_ttm,
    ps_ttm,
    trading_days,
    close,
    volume,
    turn,
FROM cn_stock_factors 
WHERE date BETWEEN '{sd_data}' AND '{ed_data}'
AND st_status = 0
AND suspended = 0
AND list_sector < 4
AND is_sh50 = 1
QUALIFY COLUMNS(*) IS NOT NULL
ORDER BY date, instrument
    
"""

data = dai.query(sql, filters={'date':[sd_data, ed_data]}).df()
data_tran = data[(sd_tran <= data['date']) & (data['date'] <= ed_tran)]
data_test = data[(sd_test <= data['date']) & (data['date'] <= ed_test)]

x_tran = data_tran.drop(['date', 'instrument', 'label'], axis=1)
y_tran = data_tran['label']
x_test = data_test.drop(['date', 'instrument', 'label'], axis=1)
y_test = data_test['label']

x_tran_numpy = np.array(x_tran)
y_tran_numpy = np.array(y_tran)
x_test_numpy = np.array(x_test)
y_test_numpy = np.array(y_test)

In [29]:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda

In [30]:

# 训练集和测试集划分
x_tran_torch = torch.Tensor(x_tran_numpy)
y_tran_torch = torch.Tensor(y_tran_numpy.reshape([-1, 1]))

x_test_torch = torch.Tensor(x_test_numpy).to(device)
y_test_torch = torch.Tensor(y_test_numpy.reshape([-1, 1]))

batch_size = 128
tran_dataset = TensorDataset(x_tran_torch, y_tran_torch)
tran_datalod = DataLoader(tran_dataset, batch_size=batch_size)

In [31]:

class DNN(nn.Module):
    
    def __init__(self, input_dim):
        super(DNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 16)
        self.fc5 = nn.Linear(16, 1)
        self.dropout = nn.Dropout(0.1)
        self.relu = nn.ReLU()
    
    def forward(self, data):
        # 第1层
        out = self.fc1(data)
        out = self.relu(out) # 激活函数层
        out = self.dropout(out) # dropout层
        
        # 第2层
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout(out)

        # 第3层
        out = self.fc3(out)
        out = self.relu(out)
        out = self.dropout(out)

        # 第4层
        out = self.fc4(out)
        out = self.relu(out)
        out = self.dropout(out)

        # 输出层
        out = self.fc5(out)
        return out

In [32]:

# 迭代次数
num_epochs = 100

# 模型准备
input_dim = x_tran_torch.shape[1] # 因子个数
model = DNN(input_dim).to(device)

# 优化器
learning_rate = 0.01 # 学习率
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 损失函数
criterion = nn.MSELoss()

t0 = time.time()

# 开始训练
for i in range(num_epochs):
    
    for idx, (x, y) in enumerate(tran_datalod):

        # 加在数据
        x, y = x.to(device), y.to(device)
        
        # 前向传播
        out = model(x)
        loss = criterion(out, y)

        # 后向传播
        optimizer.zero_grad() # 清除之前的梯度
        loss.backward() # 计算当前的梯度
        optimizer.step() # 根据梯度更新模型参数

    if i%1 == 0:
        print(f"Epoch: {i+1}, Loss: {round(loss.item(), 8)}, 耗时 {round(time.time()-t0, 4)} 秒")
    
print()
print("训练结束")
print('模型训练时间消耗:', f"{round(time.time()-t0, 4)}", "秒")

## 模型保存
# torch.save(model.state_dict(), "/home/aiuser/work/2023年训练营‘/DNN_model.pkl")
# print('运行并保存成功！')

Epoch: 1, Loss: 0.70566064, 耗时 4.3225 秒
Epoch: 2, Loss: 0.70573741, 耗时 9.2518 秒
Epoch: 3, Loss: 0.70573413, 耗时 13.287 秒
Epoch: 4, Loss: 0.70573127, 耗时 16.8894 秒
Epoch: 5, Loss: 0.70572621, 耗时 21.1007 秒
Epoch: 6, Loss: 0.70571971, 耗时 25.7354 秒
Epoch: 7, Loss: 0.70571262, 耗时 30.2302 秒
Epoch: 8, Loss: 0.70570403, 耗时 34.2911 秒
Epoch: 9, Loss: 0.7056939, 耗时 38.566 秒
Epoch: 10, Loss: 0.7056821, 耗时 43.428 秒
Epoch: 11, Loss: 0.70568359, 耗时 48.3127 秒
Epoch: 12, Loss: 0.70566869, 耗时 51.8831 秒
Epoch: 13, Loss: 0.70565116, 耗时 56.0621 秒
Epoch: 14, Loss: 0.70562971, 耗时 60.3996 秒
Epoch: 15, Loss: 0.70560515, 耗时 64.3475 秒
Epoch: 16, Loss: 0.70557666, 耗时 68.2204 秒
Epoch: 17, Loss: 0.70554382, 耗时 72.3226 秒
Epoch: 18, Loss: 0.70550585, 耗时 76.8034 秒
Epoch: 19, Loss: 0.70546263, 耗时 81.1732 秒
Epoch: 20, Loss: 0.70541221, 耗时 85.3584 秒
Epoch: 21, Loss: 0.70535421, 耗时 88.8381 秒
Epoch: 22, Loss: 0.7052868, 耗时 92.4953 秒
Epoch: 23, Loss: 0.705212, 耗时 95.7466 秒
Epoch: 24, Loss: 0.70512289, 耗时 98.7104 秒
Epoch: 25, Loss: 0.70501977, 耗时 101.7173 秒
Epoch: 26, Loss: 0.70490104, 耗时 105.286 秒
Epoch: 27, Loss: 0.7044189, 耗时 109.3618 秒
Epoch: 28, Loss: 0.70429868, 耗时 113.338 秒
Epoch: 29, Loss: 0.70412135, 耗时 116.7243 秒
Epoch: 30, Loss: 0.70391393, 耗时 119.7538 秒
Epoch: 31, Loss: 0.70368892, 耗时 123.2824 秒
Epoch: 32, Loss: 0.63082421, 耗时 126.4434 秒
Epoch: 33, Loss: 0.63038832, 耗时 129.814 秒
Epoch: 34, Loss: 0.62361896, 耗时 133.0714 秒
Epoch: 35, Loss: 0.623025, 耗时 136.4733 秒
Epoch: 36, Loss: 0.62264085, 耗时 139.877 秒
Epoch: 37, Loss: 0.62219799, 耗时 142.8724 秒
Epoch: 38, Loss: 0.62168819, 耗时 146.0599 秒
Epoch: 39, Loss: 0.62110126, 耗时 149.1168 秒
Epoch: 40, Loss: 0.62042576, 耗时 152.2872 秒
Epoch: 41, Loss: 0.61964858, 耗时 155.4612 秒
Epoch: 42, Loss: 0.61875486, 耗时 159.3298 秒
Epoch: 43, Loss: 0.61772752, 耗时 162.5667 秒
Epoch: 44, Loss: 0.61654699, 耗时 165.2689 秒
Epoch: 45, Loss: 0.61519128, 耗时 168.5513 秒
Epoch: 46, Loss: 0.6136353, 耗时 171.7335 秒
Epoch: 47, Loss: 0.61185116, 耗时 175.2531 秒
Epoch: 48, Loss: 0.60980678, 耗时 178.7822 秒
Epoch: 49, Loss: 0.60746634, 耗时 182.046 秒
Epoch: 50, Loss: 0.60479021, 耗时 184.879 秒
Epoch: 51, Loss: 0.60173428, 耗时 188.7562 秒
Epoch: 52, Loss: 0.59824955, 耗时 191.8233 秒
Epoch: 53, Loss: 0.59428293, 耗时 195.4494 秒
Epoch: 54, Loss: 0.58977681, 耗时 198.949 秒
Epoch: 55, Loss: 0.58466876, 耗时 202.7695 秒
Epoch: 56, Loss: 0.57889396, 耗时 206.4613 秒
Epoch: 57, Loss: 0.54933417, 耗时 209.9439 秒
Epoch: 58, Loss: 0.54167032, 耗时 214.0305 秒
Epoch: 59, Loss: 0.5342415, 耗时 217.5475 秒
Epoch: 60, Loss: 0.52596247, 耗时 221.0693 秒
Epoch: 61, Loss: 0.51678276, 耗时 225.1381 秒
Epoch: 62, Loss: 0.50666302, 耗时 228.4293 秒
Epoch: 63, Loss: 0.4955799, 耗时 231.9748 秒
Epoch: 64, Loss: 0.48353338, 耗时 235.2028 秒
Epoch: 65, Loss: 0.47055072, 耗时 238.5958 秒
Epoch: 66, Loss: 0.45669329, 耗时 242.3831 秒
Epoch: 67, Loss: 0.44206434, 耗时 245.5873 秒
Epoch: 68, Loss: 0.42681009, 耗时 248.7921 秒
Epoch: 69, Loss: 0.41112548, 耗时 252.2061 秒
Epoch: 70, Loss: 0.39524835, 耗时 255.6987 秒
Epoch: 71, Loss: 0.379457, 耗时 259.2835 秒
Epoch: 72, Loss: 0.36405879, 耗时 263.1182 秒
Epoch: 73, Loss: 0.34937167, 耗时 267.0524 秒
Epoch: 74, Loss: 0.33570769, 耗时 270.7574 秒
Epoch: 75, Loss: 0.32334632, 耗时 274.5321 秒
Epoch: 76, Loss: 0.31251365, 耗时 279.1995 秒
Epoch: 77, Loss: 0.30335927, 耗时 283.2284 秒
Epoch: 78, Loss: 0.29594129, 耗时 286.511 秒
Epoch: 79, Loss: 0.2902188, 耗时 289.7166 秒
Epoch: 80, Loss: 0.28605616, 耗时 293.1583 秒
Epoch: 81, Loss: 0.28323641, 耗时 296.3548 秒
Epoch: 82, Loss: 0.28149271, 耗时 299.7411 秒
Epoch: 83, Loss: 0.28054133, 耗时 302.9473 秒
Epoch: 84, Loss: 0.28012154, 耗时 306.2885 秒
Epoch: 85, Loss: 0.28002205, 耗时 309.7937 秒
Epoch: 86, Loss: 0.28009444, 耗时 312.9496 秒
Epoch: 87, Loss: 0.28024724, 耗时 316.5409 秒
Epoch: 88, Loss: 0.28043044, 耗时 320.0489 秒
Epoch: 89, Loss: 0.28061706, 耗时 323.5198 秒
Epoch: 90, Loss: 0.28078848, 耗时 327.7933 秒
Epoch: 91, Loss: 0.2809259, 耗时 331.269 秒
Epoch: 92, Loss: 0.28100699, 耗时 335.0433 秒
Epoch: 93, Loss: 0.28100616, 耗时 338.2725 秒
Epoch: 94, Loss: 0.28089514, 耗时 341.6954 秒
Epoch: 95, Loss: 0.28064853, 耗时 344.6798 秒
Epoch: 96, Loss: 0.28024787, 耗时 348.2397 秒
Epoch: 97, Loss: 0.27968755, 耗时 351.3737 秒
Epoch: 98, Loss: 0.27897888, 耗时 355.187 秒
Epoch: 99, Loss: 0.27814969, 耗时 359.1901 秒
Epoch: 100, Loss: 0.27724019, 耗时 363.2054 秒

训练结束
模型训练时间消耗: 363.2176 秒

In [33]:

## 模型加载
# model = DNN()
# model.load_state_dict(torch.load('/home/aiuser/work/2023年训练营‘/DNN_model.pkl', map_location=torch.device('cpu')))

In [34]:

y_pred_torch = model(x_test_torch).detach().cpu()
y_pred = y_pred_torch.numpy().reshape([1, -1])[0]

data_pred = data_test.assign(pred_label = y_pred)
data_pred.sort_values(['date','pred_label'], inplace=True, ascending=[True,False])
data_pred

Out[34]:

	date	instrument	label	log(float_market_cap)	pe_ttm	ps_ttm	trading_days	close	volume	turn	pred_label
36379	2023-01-03	600010.SH	0	10.786304	168.770474	1.001903	5298	16.211348	197091166	0.006222	0.468404
36380	2023-01-03	600028.SH	0	11.621704	7.776415	0.165244	5195	17.269679	85476673	0.000899	0.468404
36381	2023-01-03	600030.SH	1	11.354487	13.392450	4.303416	4857	122.773864	69294429	0.006096	0.468404
36382	2023-01-03	600031.SH	1	11.126587	43.803776	1.725819	4743	799.490524	44506741	0.005254	0.468404
36383	2023-01-03	600036.SH	1	11.889434	7.113780	2.749718	5039	203.484463	60626616	0.002939	0.468404
...	...	...	...	...	...	...	...	...	...	...	...
51924	2024-04-17	603986.SH	0	10.701926	128.128239	8.778686	1860	426.692962	34327549	0.051660	0.468404
51925	2024-04-17	688041.SH	0	10.847808	147.279269	30.944786	406	80.079301	19643822	0.022322	0.468404
51926	2024-04-17	688111.SH	0	11.125698	101.360726	29.316882	1071	291.450680	2656171	0.005752	0.468404
51927	2024-04-17	688599.SH	0	10.682526	7.575749	0.445873	935	22.773000	12190078	0.005593	0.468404
51928	2024-04-17	688981.SH	0	10.915918	68.839443	7.336944	911	41.750000	18126686	0.009185	0.468404

15550 rows × 11 columns

In [37]:

from bigmodule import M
from bigtrader.finance.commission import PerOrder

def m_initialize_bigquant_run(context):
    context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
    context.holding_days = 5
    context.target_hold_count = 3
    context.target_percent_per_instrument = 1.0 / context.target_hold_count

def m_before_trading_start_bigquant_run(context, data):
    pass

def m_handle_tick_bigquant_run(context, tick):
    pass

def m_handle_data_bigquant_run(context, data):
    if context.trading_day_index % context.holding_days != 0:
        return
    current_date = data.current_dt.strftime("%Y-%m-%d")
    current_day_data = context.data[context.data["date"] == current_date]
    current_day_data = current_day_data.iloc[:context.target_hold_count]
    target_hold_instruments = set(current_day_data["instrument"])
    current_hold_instruments = set(context.get_account_positions().keys())
    sell_set = current_hold_instruments - target_hold_instruments
    buy_set  = target_hold_instruments - current_hold_instruments
    for instrument in sell_set:
        context.order_target_percent(instrument, 0)
    for instrument in buy_set:
        context.order_target_percent(instrument, context.target_percent_per_instrument)

def m_handle_trade_bigquant_run(context, trade):
    pass

def m_handle_order_bigquant_run(context, order):
    pass

def m_after_trading_bigquant_run(context, data):
    pass

m = M.bigtrader.v14(
    data=data_pred,
    start_date='',
    end_date='',
    initialize=m_initialize_bigquant_run,
    before_trading_start=m_before_trading_start_bigquant_run,
    handle_tick=m_handle_tick_bigquant_run,
    handle_data=m_handle_data_bigquant_run,
    handle_trade=m_handle_trade_bigquant_run,
    handle_order=m_handle_order_bigquant_run,
    after_trading=m_after_trading_bigquant_run,
    capital_base=500000,
    frequency='daily',
    product_type='股票',
    before_start_days=0,
    volume_limit=1,
    order_price_field_buy='open',
    order_price_field_sell='close',
    benchmark='000001.SH',
    plot_charts=True,
    disable_cache=False,
    debug=False,
    backtest_only=False,
    m_cached=False
)

[2024-04-17 18:27:48] [info     ] bigtrader.v14 开始运行..
[2024-04-17 18:27:48] [info     ] 2023-01-03, 2024-04-17, instruments=59
[2024-04-17 18:27:48] [info     ] bigtrader module V2.0.3
[2024-04-17 18:27:48] [info     ] bigtrader engine v1.10.8 2024-04-07
[2024-04-17 18:27:51] [info     ] backtest done, raw_perf_ds:dai.DataSource("_1ca02fe243b8458c968d58f04ab571db")

[2024-04-17 18:27:51.810175] INFO: bigcharts.impl.render:render.py:639:render_chart Data is None, skip loading it to chart.
[2024-04-17 18:27:51.924598] INFO: bigcharts.impl.render:render.py:639:render_chart Data is None, skip loading it to chart.

[2024-04-17 18:27:52] [info     ] bigtrader.v14 运行完成[3.57s].

BigTrader(高性能回测/交易)

提交模拟交易

收益:年化收益越大得分越高

抗风险:回撤越低得分越高

分散度:持仓和交易股票数越多得分越高，股票数超过10只后趋于稳定

稳定性:超额收益越高，波动越小，得分越高

模拟时长:模拟时间越长得分越高，超过100天后得分趋于稳定

+9.04%

收益率

年化收益率+6.96%
基准收益率-1.45%
阿尔法0.09
贝塔0.89
夏普比率0.33

胜率0.0
盈亏比0.0
收益波动率16.52%
信息比率0.05
最大回撤18.6%

评分规则

日期	时间	证券代码	证券名称	买/卖	数量	成交价	成交金额	平仓盈亏	交易费用
Loading... (need help?)

日期	证券代码	证券名称	数量	持仓均价	收盘价	持仓市值	持仓占比	收益
Loading... (need help?)

时间	级别	内容
Loading... (need help?)