因子:样例因子(7个)
因子是否标准化:是
标注:未来5日收益(不做离散化)
算法:LSTM
类型:回归问题
训练集:10-16年
测试集:16-19年
选股依据:根据预测值降序排序买入
持股数:30
持仓天数:5
# 本代码由可视化策略环境自动生成 2022年3月14日 11:11
# 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m4_run_bigquant_run(input_1, input_2, input_3):
# 示例代码如下。在这里编写您的代码
df = input_1.read_pickle()
feature_len = len(input_2.read_pickle())
df['x'] = df['x'].reshape(df['x'].shape[0], int(feature_len), int(df['x'].shape[1]/feature_len))
data_1 = DataSource.write_pickle(df)
return Outputs(data_1=data_1)
# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
def m4_post_run_bigquant_run(outputs):
return outputs
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m8_run_bigquant_run(input_1, input_2, input_3):
# 示例代码如下。在这里编写您的代码
df = input_1.read_pickle()
feature_len = len(input_2.read_pickle())
df['x'] = df['x'].reshape(df['x'].shape[0], int(feature_len), int(df['x'].shape[1]/feature_len))
data_1 = DataSource.write_pickle(df)
return Outputs(data_1=data_1)
# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
def m8_post_run_bigquant_run(outputs):
return outputs
# 用户的自定义层需要写到字典中,比如
# {
# "MyLayer": MyLayer
# }
m5_custom_objects_bigquant_run = {
}
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m24_run_bigquant_run(input_1, input_2, input_3):
# 示例代码如下。在这里编写您的代码
pred_label = input_1.read_pickle()
df = input_2.read_df()
df = pd.DataFrame({'pred_label':pred_label[:,0], 'instrument':df.instrument, 'date':df.date})
df.sort_values(['date','pred_label'],inplace=True, ascending=[True,False])
return Outputs(data_1=DataSource.write_df(df), data_2=None, data_3=None)
# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
def m24_post_run_bigquant_run(outputs):
return outputs
# 回测引擎:初始化函数,只执行一次
def m19_initialize_bigquant_run(context):
# 加载预测数据
context.ranker_prediction = context.options['data'].read_df()
# 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
# 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)
# 设置买入的股票数量,这里买入预测股票列表排名靠前的5只
stock_count = 15
# 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]
context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])
# 设置每只股票占用的最大资金比例
context.max_cash_per_instrument = 0.9
context.options['hold_days'] = 5
# 回测引擎:每日数据处理函数,每天执行一次
def m19_handle_data_bigquant_run(context, data):
# 按日期过滤得到今日的预测数据
ranker_prediction = context.ranker_prediction[
context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]
# 1. 资金分配
# 平均持仓时间是hold_days,每日都将买入股票,每日预期使用 1/hold_days 的资金
# 实际操作中,会存在一定的买入误差,所以在前hold_days天,等量使用资金;之后,尽量使用剩余资金(这里设置最多用等量的1.5倍)
is_staging = context.trading_day_index < context.options['hold_days'] # 是否在建仓期间(前 hold_days 天)
cash_avg = context.portfolio.portfolio_value / context.options['hold_days']
cash_for_buy = min(context.portfolio.cash, (1 if is_staging else 1.5) * cash_avg)
cash_for_sell = cash_avg - (context.portfolio.cash - cash_for_buy)
positions = {e.symbol: p.amount * p.last_sale_price
for e, p in context.perf_tracker.position_tracker.positions.items()}
# 2. 生成卖出订单:hold_days天之后才开始卖出;对持仓的股票,按机器学习算法预测的排序末位淘汰
if not is_staging and cash_for_sell > 0:
equities = {e.symbol: e for e, p in context.perf_tracker.position_tracker.positions.items()}
instruments = list(reversed(list(ranker_prediction.instrument[ranker_prediction.instrument.apply(
lambda x: x in equities and not context.has_unfinished_sell_order(equities[x]))])))
# print('rank order for sell %s' % instruments)
for instrument in instruments:
context.order_target(context.symbol(instrument), 0)
cash_for_sell -= positions[instrument]
if cash_for_sell <= 0:
break
# 3. 生成买入订单:按机器学习算法预测的排序,买入前面的stock_count只股票
buy_cash_weights = context.stock_weights
buy_instruments = list(ranker_prediction.instrument[:len(buy_cash_weights)])
max_cash_per_instrument = context.portfolio.portfolio_value * context.max_cash_per_instrument
for i, instrument in enumerate(buy_instruments):
cash = cash_for_buy * buy_cash_weights[i]
if cash > max_cash_per_instrument - positions.get(instrument, 0):
# 确保股票持仓量不会超过每次股票最大的占用资金量
cash = max_cash_per_instrument - positions.get(instrument, 0)
if cash > 0:
context.order_value(context.symbol(instrument), cash)
# 回测引擎:准备数据,只执行一次
def m19_prepare_bigquant_run(context):
pass
m1 = M.instruments.v2(
start_date='2020-01-01',
end_date='2021-06-30',
market='CN_STOCK_A',
instrument_list="""300014.SZA
002202.SZA
600236.SHA
600884.SHA
300037.SZA
002266.SZA
300073.SZA
600732.SHA
002340.SZA
002129.SZA
601908.SHA
300316.SZA
601012.SHA
603806.SHA
601985.SHA
603659.SHA
300724.SZA
300751.SZA
300763.SZA
688005.SHA
003816.SZA
688116.SHA
688599.SHA
300919.SZA
600032.SHA
600905.SHA
600900.SHA
600438.SHA
600674.SHA
002459.SZA
002506.SZA
300070.SZA
002074.SZA
600886.SHA
000883.SZA
300274.SZA
002709.SZA
300450.SZA
603568.SHA
603026.SHA
002812.SZA
603218.SHA
600025.SHA
300750.SZA
603185.SHA
601615.SHA
601865.SHA
688408.SHA
688390.SHA
003035.SZA
""",
max_count=0
)
m2 = M.advanced_auto_labeler.v2(
instruments=m1.data,
label_expr="""# #号开始的表示注释
# 0. 每行一个,顺序执行,从第二个开始,可以使用label字段
# 1. 可用数据字段见 https://bigquant.com/docs/data_history_data.html
# 添加benchmark_前缀,可使用对应的benchmark数据
# 2. 可用操作符和函数见 `表达式引擎 <https://bigquant.com/docs/big_expr.html>`_
# 计算收益:5日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)
shift(close, -5) / shift(open, -1)-1
# 极值处理:用1%和99%分位的值做clip
clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))
# 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)
where(shift(high, -1) == shift(low, -1), NaN, label)
""",
start_date='',
end_date='',
benchmark='h00977.CSI',
drop_na_label=True,
cast_label_int=False
)
m13 = M.standardlize.v8(
input_1=m2.data,
columns_input='label'
)
m3 = M.input_features.v1(
features="""close_0
open_0
high_0
low_0
fs_net_income_0/market_cap_0
fs_roe_0
fs_roa_0
market_cap_0
ta_ma(close_0, timeperiod=5)
ta_ma(close_0, timeperiod=10)
ta_ma(close_0, timeperiod=15)"""
)
m15 = M.general_feature_extractor.v7(
instruments=m1.data,
features=m3.data,
start_date='',
end_date='',
before_start_days=30
)
m16 = M.derived_feature_extractor.v3(
input_data=m15.data,
features=m3.data,
date_col='date',
instrument_col='instrument',
drop_na=True,
remove_extra_columns=False
)
m14 = M.standardlize.v8(
input_1=m16.data,
input_2=m3.data,
columns_input='[]'
)
m7 = M.join.v3(
data1=m13.data,
data2=m14.data,
on='date,instrument',
how='inner',
sort=False
)
m26 = M.dl_convert_to_bin.v2(
input_data=m7.data,
features=m3.data,
window_size=5,
feature_clip=5,
flatten=True,
window_along_col='instrument'
)
m4 = M.cached.v3(
input_1=m26.data,
input_2=m3.data,
run=m4_run_bigquant_run,
post_run=m4_post_run_bigquant_run,
input_ports='',
params='{}',
output_ports=''
)
m9 = M.instruments.v2(
start_date=T.live_run_param('trading_date', '2021-07-01'),
end_date=T.live_run_param('trading_date', '2021-12-31'),
market='CN_STOCK_A',
instrument_list="""300014.SZA
002202.SZA
600236.SHA
600884.SHA
300037.SZA
002266.SZA
300073.SZA
600732.SHA
002340.SZA
002129.SZA
601908.SHA
300316.SZA
601012.SHA
603806.SHA
601985.SHA
603659.SHA
300724.SZA
300751.SZA
300763.SZA
688005.SHA
003816.SZA
688116.SHA
688599.SHA
300919.SZA
600032.SHA
600905.SHA
600900.SHA
600438.SHA
600674.SHA
002459.SZA
002506.SZA
300070.SZA
002074.SZA
600886.SHA
000883.SZA
300274.SZA
002709.SZA
300450.SZA
603568.SHA
603026.SHA
002812.SZA
603218.SHA
600025.SHA
300750.SZA
603185.SHA
601615.SHA
601865.SHA
688408.SHA
688390.SHA
003035.SZA
""",
max_count=0
)
m17 = M.general_feature_extractor.v7(
instruments=m9.data,
features=m3.data,
start_date='',
end_date='',
before_start_days=30
)
m18 = M.derived_feature_extractor.v3(
input_data=m17.data,
features=m3.data,
date_col='date',
instrument_col='instrument',
drop_na=True,
remove_extra_columns=False
)
m25 = M.standardlize.v8(
input_1=m18.data,
input_2=m3.data,
columns_input='[]'
)
m27 = M.dl_convert_to_bin.v2(
input_data=m25.data,
features=m3.data,
window_size=5,
feature_clip=5,
flatten=True,
window_along_col='instrument'
)
m8 = M.cached.v3(
input_1=m27.data,
input_2=m3.data,
run=m8_run_bigquant_run,
post_run=m8_post_run_bigquant_run,
input_ports='',
params='{}',
output_ports=''
)
m6 = M.dl_layer_input.v1(
shape='11,5',
batch_shape='',
dtype='float32',
sparse=False,
name=''
)
m10 = M.dl_layer_lstm.v1(
inputs=m6.data,
units=32,
activation='tanh',
recurrent_activation='hard_sigmoid',
use_bias=True,
kernel_initializer='glorot_uniform',
recurrent_initializer='Orthogonal',
bias_initializer='Zeros',
unit_forget_bias=True,
kernel_regularizer='None',
kernel_regularizer_l1=0,
kernel_regularizer_l2=0,
recurrent_regularizer='None',
recurrent_regularizer_l1=0,
recurrent_regularizer_l2=0,
bias_regularizer='None',
bias_regularizer_l1=0,
bias_regularizer_l2=0,
activity_regularizer='None',
activity_regularizer_l1=0,
activity_regularizer_l2=0,
kernel_constraint='None',
recurrent_constraint='None',
bias_constraint='None',
dropout=0,
recurrent_dropout=0,
return_sequences=False,
implementation='0',
name=''
)
m12 = M.dl_layer_dropout.v1(
inputs=m10.data,
rate=0.2,
noise_shape='',
name=''
)
m20 = M.dl_layer_dense.v1(
inputs=m12.data,
units=30,
activation='tanh',
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='Zeros',
kernel_regularizer='None',
kernel_regularizer_l1=0,
kernel_regularizer_l2=0,
bias_regularizer='None',
bias_regularizer_l1=0,
bias_regularizer_l2=0,
activity_regularizer='None',
activity_regularizer_l1=0,
activity_regularizer_l2=0,
kernel_constraint='None',
bias_constraint='None',
name=''
)
m21 = M.dl_layer_dropout.v1(
inputs=m20.data,
rate=0.2,
noise_shape='',
name=''
)
m22 = M.dl_layer_dense.v1(
inputs=m21.data,
units=1,
activation='tanh',
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='Zeros',
kernel_regularizer='None',
kernel_regularizer_l1=0,
kernel_regularizer_l2=0,
bias_regularizer='None',
bias_regularizer_l1=0,
bias_regularizer_l2=0,
activity_regularizer='None',
activity_regularizer_l1=0,
activity_regularizer_l2=0,
kernel_constraint='None',
bias_constraint='None',
name=''
)
m34 = M.dl_model_init.v1(
inputs=m6.data,
outputs=m22.data
)
m5 = M.dl_model_train.v1(
input_model=m34.data,
training_data=m4.data_1,
optimizer='RMSprop',
loss='mean_squared_error',
metrics='mae',
batch_size=256,
epochs=5,
custom_objects=m5_custom_objects_bigquant_run,
n_gpus=0,
verbose='2:每个epoch输出一行记录'
)
m11 = M.dl_model_predict.v1(
trained_model=m5.data,
input_data=m8.data_1,
batch_size=1024,
n_gpus=0,
verbose='2:每个epoch输出一行记录'
)
m24 = M.cached.v3(
input_1=m11.data,
input_2=m18.data,
run=m24_run_bigquant_run,
post_run=m24_post_run_bigquant_run,
input_ports='',
params='{}',
output_ports=''
)
m19 = M.trade.v4(
instruments=m9.data,
options_data=m24.data_1,
start_date='',
end_date='',
initialize=m19_initialize_bigquant_run,
handle_data=m19_handle_data_bigquant_run,
prepare=m19_prepare_bigquant_run,
volume_limit=0.025,
order_price_field_buy='open',
order_price_field_sell='close',
capital_base=1000000,
auto_cancel_non_tradable_orders=True,
data_frequency='daily',
price_type='后复权',
product_type='股票',
plot_charts=True,
backtest_only=False,
benchmark='h00977.CSI'
)
[2022-03-14 11:03:25.262642] INFO: moduleinvoker: instruments.v2 开始运行..
[2022-03-14 11:03:25.451377] INFO: moduleinvoker: 命中缓存
[2022-03-14 11:03:25.453157] INFO: moduleinvoker: instruments.v2 运行完成[0.190523s].
[2022-03-14 11:03:25.464826] INFO: moduleinvoker: advanced_auto_labeler.v2 开始运行..
[2022-03-14 11:03:25.856045] INFO: 自动标注(股票): 加载历史数据: 16423 行
[2022-03-14 11:03:25.857599] INFO: 自动标注(股票): 开始标注 ..
[2022-03-14 11:03:25.966275] INFO: moduleinvoker: advanced_auto_labeler.v2 运行完成[0.501449s].
[2022-03-14 11:03:25.984900] INFO: moduleinvoker: standardlize.v8 开始运行..
[2022-03-14 11:03:26.406770] INFO: moduleinvoker: standardlize.v8 运行完成[0.421871s].
[2022-03-14 11:03:26.412723] INFO: moduleinvoker: input_features.v1 开始运行..
[2022-03-14 11:03:26.421273] INFO: moduleinvoker: 命中缓存
[2022-03-14 11:03:26.422943] INFO: moduleinvoker: input_features.v1 运行完成[0.010228s].
[2022-03-14 11:03:26.450118] INFO: moduleinvoker: general_feature_extractor.v7 开始运行..
[2022-03-14 11:03:26.476951] INFO: moduleinvoker: 命中缓存
[2022-03-14 11:03:26.479270] INFO: moduleinvoker: general_feature_extractor.v7 运行完成[0.029198s].
[2022-03-14 11:03:26.489339] INFO: moduleinvoker: derived_feature_extractor.v3 开始运行..
[2022-03-14 11:03:26.501561] INFO: moduleinvoker: 命中缓存
[2022-03-14 11:03:26.503365] INFO: moduleinvoker: derived_feature_extractor.v3 运行完成[0.014029s].
[2022-03-14 11:03:26.509711] INFO: moduleinvoker: standardlize.v8 开始运行..
[2022-03-14 11:03:26.519663] INFO: moduleinvoker: 命中缓存
[2022-03-14 11:03:26.521381] INFO: moduleinvoker: standardlize.v8 运行完成[0.011675s].
[2022-03-14 11:03:26.531463] INFO: moduleinvoker: join.v3 开始运行..
[2022-03-14 11:03:26.748747] INFO: join: /data, 行数=15096/15678, 耗时=0.091136s
[2022-03-14 11:03:26.786155] INFO: join: 最终行数: 15096
[2022-03-14 11:03:26.795041] INFO: moduleinvoker: join.v3 运行完成[0.26358s].
[2022-03-14 11:03:26.812681] INFO: moduleinvoker: dl_convert_to_bin.v2 开始运行..
[2022-03-14 11:03:27.761705] INFO: moduleinvoker: dl_convert_to_bin.v2 运行完成[0.949036s].
[2022-03-14 11:03:27.792746] INFO: moduleinvoker: cached.v3 开始运行..
[2022-03-14 11:03:27.871438] INFO: moduleinvoker: cached.v3 运行完成[0.078702s].
[2022-03-14 11:03:27.880540] INFO: moduleinvoker: instruments.v2 开始运行..
[2022-03-14 11:03:27.899268] INFO: moduleinvoker: 命中缓存
[2022-03-14 11:03:27.901569] INFO: moduleinvoker: instruments.v2 运行完成[0.021047s].
[2022-03-14 11:03:27.915857] INFO: moduleinvoker: general_feature_extractor.v7 开始运行..
[2022-03-14 11:03:27.926123] INFO: moduleinvoker: 命中缓存
[2022-03-14 11:03:27.928479] INFO: moduleinvoker: general_feature_extractor.v7 运行完成[0.012647s].
[2022-03-14 11:03:27.937977] INFO: moduleinvoker: derived_feature_extractor.v3 开始运行..
[2022-03-14 11:03:27.950663] INFO: moduleinvoker: 命中缓存
[2022-03-14 11:03:27.952470] INFO: moduleinvoker: derived_feature_extractor.v3 运行完成[0.014499s].
[2022-03-14 11:03:27.958633] INFO: moduleinvoker: standardlize.v8 开始运行..
[2022-03-14 11:03:27.970557] INFO: moduleinvoker: 命中缓存
[2022-03-14 11:03:27.972802] INFO: moduleinvoker: standardlize.v8 运行完成[0.01416s].
[2022-03-14 11:03:27.994889] INFO: moduleinvoker: dl_convert_to_bin.v2 开始运行..
[2022-03-14 11:03:28.008396] INFO: moduleinvoker: 命中缓存
[2022-03-14 11:03:28.010849] INFO: moduleinvoker: dl_convert_to_bin.v2 运行完成[0.015949s].
[2022-03-14 11:03:28.026139] INFO: moduleinvoker: cached.v3 开始运行..
[2022-03-14 11:03:28.037250] INFO: moduleinvoker: 命中缓存
[2022-03-14 11:03:28.039821] INFO: moduleinvoker: cached.v3 运行完成[0.013685s].
[2022-03-14 11:03:28.054149] INFO: moduleinvoker: dl_layer_input.v1 运行完成[0.005858s].
[2022-03-14 11:03:28.219629] INFO: moduleinvoker: dl_layer_lstm.v1 运行完成[0.157543s].
[2022-03-14 11:03:28.231504] INFO: moduleinvoker: dl_layer_dropout.v1 运行完成[0.003938s].
[2022-03-14 11:03:28.249336] INFO: moduleinvoker: dl_layer_dense.v1 运行完成[0.010931s].
[2022-03-14 11:03:28.259860] INFO: moduleinvoker: dl_layer_dropout.v1 运行完成[0.003818s].
[2022-03-14 11:03:28.286297] INFO: moduleinvoker: dl_layer_dense.v1 运行完成[0.01121s].
[2022-03-14 11:03:28.324119] INFO: moduleinvoker: cached.v3 开始运行..
[2022-03-14 11:03:28.333177] INFO: moduleinvoker: 命中缓存
[2022-03-14 11:03:28.334829] INFO: moduleinvoker: cached.v3 运行完成[0.010732s].
[2022-03-14 11:03:28.336490] INFO: moduleinvoker: dl_model_init.v1 运行完成[0.041985s].
[2022-03-14 11:03:28.341041] INFO: moduleinvoker: dl_model_train.v1 开始运行..
[2022-03-14 11:03:28.630747] INFO: dl_model_train: 准备训练,训练样本个数:15096,迭代次数:5
[2022-03-14 11:04:05.779751] INFO: dl_model_train: 训练结束,耗时:37.15s
[2022-03-14 11:04:05.812724] INFO: moduleinvoker: dl_model_train.v1 运行完成[37.471674s].
[2022-03-14 11:04:05.817794] INFO: moduleinvoker: dl_model_predict.v1 开始运行..
[2022-03-14 11:04:06.822954] INFO: moduleinvoker: dl_model_predict.v1 运行完成[1.005148s].
[2022-03-14 11:04:06.882239] INFO: moduleinvoker: cached.v3 开始运行..
[2022-03-14 11:04:06.992357] INFO: moduleinvoker: cached.v3 运行完成[0.110127s].
[2022-03-14 11:04:07.042552] INFO: moduleinvoker: backtest.v8 开始运行..
[2022-03-14 11:04:07.050669] INFO: backtest: biglearning backtest:V8.6.2
[2022-03-14 11:04:07.053069] INFO: backtest: product_type:stock by specified
[2022-03-14 11:04:13.782168] ERROR: moduleinvoker: module name: backtest, module version: v8, trackeback: TypeError: object of type 'NoneType' has no len()
[2022-03-14 11:04:13.788271] ERROR: moduleinvoker: module name: trade, module version: v4, trackeback: TypeError: object of type 'NoneType' has no len()