机器学习:XGBoost(Extreme Gradient Boosting)

准备数据

In [43]:
import dai
import pandas as pd
import numpy as np
from datetime import datetime

## 准备开始事件和结束时间
## 全部数据集
sd_data = '2020-01-01'
ed_data = datetime.now().date().strftime("%Y-%m-%d")

## 训练集
sd_tran = "2020-01-01"
ed_tran = "2022-12-31"

## 测试集
sd_test = "2023-01-01"
ed_test = ed_data

## 提取数据的SQL
sql = f"""

SELECT
    date,
    instrument,
    -- Label
    IF(m_lead(close, 5) > m_lead(open,1), 1, 0) AS label,
    -- Feature
    c_rank(a.total_market_cap),
    c_rank(a.float_market_cap),
    c_rank(pe_ttm),
    c_rank(ps_ttm),
FROM cn_stock_factors_base AS a JOIN cn_stock_valuation AS b USING (date, instrument) JOIN cn_stock_instruments AS c USING (date, instrument)
WHERE date BETWEEN '{sd_data}' AND '{ed_data}'
AND (name NOT LIKE '%ST%' AND name NOT LIKE '%退%')
AND st_status = 0
AND suspended = 0
AND list_sector < 4
AND is_sh50 = 1
QUALIFY COLUMNS(*) IS NOT NULL
ORDER BY date, instrument
    
"""

data = dai.query(sql, filters={'date':[sd_data, ed_data]}).df()
data_tran = data[(sd_tran <= data['date']) & (data['date'] <= ed_tran)]
data_test = data[(sd_test <= data['date']) & (data['date'] <= ed_test)]
In [44]:
x_tran = data_tran.drop(['date', 'instrument', 'label'], axis = 1)
x_test = data_test.drop(['date', 'instrument', 'label'], axis = 1)

y_tran = data_tran['label']
y_test = data_test['label']

模型

In [45]:
import xgboost as xgb
from sklearn.metrics import accuracy_score

model = xgb.XGBClassifier(
    objective='binary:logistic',
    num_boost_round=30,
    booster="gbtree", 
    learning_rate=0.01, 
    max_depth=5, 
    key_cols="date,instrument",
    group_col="date",
    nthread = 16,
    tree_method = "hist",
    n_estimators=1000
    ) 

model.fit(x_tran, y_tran) 
y_pred = model.predict_proba(x_test)[:,0]
data_pred = data_test.assign(pred_label = y_pred)

模型回测

In [46]:
data_pred.sort_values(['date','pred_label'], inplace=True, ascending=[True,False])
In [47]:
from bigmodule import M
from bigtrader.finance.commission import PerOrder

def m_initialize_bigquant_run(context):
    context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
    context.holding_days = 5
    context.target_hold_count = 3
    context.target_percent_per_instrument = 1.0 / context.target_hold_count

def m_before_trading_start_bigquant_run(context, data):
    pass

def m_handle_tick_bigquant_run(context, tick):
    pass

def m_handle_data_bigquant_run(context, data):
    if context.trading_day_index % context.holding_days != 0:
        return
    current_date = data.current_dt.strftime("%Y-%m-%d")
    current_day_data = context.data[context.data["date"] == current_date]
    current_day_data = current_day_data.iloc[:context.target_hold_count]
    target_hold_instruments = set(current_day_data["instrument"])
    current_hold_instruments = set(context.get_account_positions().keys())
    sell_set = current_hold_instruments - target_hold_instruments
    buy_set  = target_hold_instruments - current_hold_instruments
    for instrument in sell_set:
        context.order_target_percent(instrument, 0)
    for instrument in buy_set:
        context.order_target_percent(instrument, context.target_percent_per_instrument)

def m_handle_trade_bigquant_run(context, trade):
    pass

def m_handle_order_bigquant_run(context, order):
    pass

def m_after_trading_bigquant_run(context, data):
    pass

m = M.bigtrader.v14(
    data=data_pred,
    start_date='',
    end_date='',
    initialize=m_initialize_bigquant_run,
    before_trading_start=m_before_trading_start_bigquant_run,
    handle_tick=m_handle_tick_bigquant_run,
    handle_data=m_handle_data_bigquant_run,
    handle_trade=m_handle_trade_bigquant_run,
    handle_order=m_handle_order_bigquant_run,
    after_trading=m_after_trading_bigquant_run,
    capital_base=500000,
    frequency='daily',
    product_type='股票',
    before_start_days=0,
    volume_limit=1,
    order_price_field_buy='open',
    order_price_field_sell='close',
    benchmark='000001.SH',
    plot_charts=True,
    disable_cache=False,
    debug=False,
    backtest_only=False,
    m_cached=False
)
BigTrader(高性能回测/交易)
收益:年化收益越大得分越高
抗风险:回撤越低得分越高
分散度:持仓和交易股票数越多得分越高,股票数超过10只后趋于稳定
稳定性:超额收益越高,波动越小,得分越高
模拟时长:模拟时间越长得分越高,超过100天后得分趋于稳定

+10.58%
收益率
  • 年化收益率+8.17%
  • 基准收益率-3.51%
  • 阿尔法0.11
  • 贝塔0.77
  • 夏普比率0.41
  • 胜率0.54
  • 盈亏比1.22
  • 收益波动率15.87%
  • 信息比率0.06
  • 最大回撤25.97%
评分规则
日期 时间 证券代码 证券名称 买/卖 数量 成交价 成交金额 平仓盈亏 交易费用
Loading... (need help?)
日期 证券代码 证券名称 数量 持仓均价 收盘价 持仓市值 持仓占比 收益
Loading... (need help?)
时间 级别 内容
Loading... (need help?)