In [2]:
from zipline.api import get_open_orders
from zipline.api import symbol
from zipline.api import cancel_order
import warnings

# 忽略所有警告
warnings.filterwarnings("ignore")

# 在这里写下可能会产生警告的代码
In [20]:
df = m4.data.read()
df['label'].value_counts().sum()
Out[20]:
15924
In [11]:
m9.data_1.read()
Out[11]:
date instrument (m_max(high_0, 5) / m_min(close_0, 5)) m_corr(high_0, close_0, 10) (avg_amount_5 / avg_amount_10) (rank_swing_volatility_5 / rank_swing_volatility_30) avg_main_rate_5 netflow_xl_rate_0 main_rate_0 ta_wma_10 ... ta_ema_5 (money_netflow_0 / avg_money_netflow_5) (return_5 / return_10) (rank_amount_0 / rank_avg_amount_5) (rank_avg_amount_5 / rank_avg_amount_10) (rank_return_0 / rank_return_5) (turn_0 / avg_turn_3) (turn_0 / avg_turn_5) rank_avg_turn_5 label
0 2016-01-04 600463.SH 1.258578 0.897843 1.071424 0.152034 0.065361 -0.161099 -0.339787 51.739909 ... 52.279970 4.752897 0.953515 1.656737 1.182038 0.999072 1.666155 2.183054 0.763571 19
1 2016-01-05 000418.SZ 1.159570 0.894691 1.406773 0.910592 0.041859 -0.018792 -0.071819 66.291152 ... 65.053525 3.059583 0.937658 1.199462 1.438685 0.998009 1.398550 1.762189 0.526071 3
2 2016-01-05 000838.SZ 1.205319 0.830155 0.967191 0.968433 0.081405 -0.067630 -0.060253 154.095907 ... 151.200870 0.301290 0.996385 1.008753 1.001686 1.010986 1.371773 1.552949 0.990714 19
3 2016-01-05 600463.SH 1.346814 0.951719 1.151025 0.956380 0.044766 -0.195049 -0.389352 53.959879 ... 53.555152 1.841029 0.952760 0.844161 1.242893 1.002206 0.630511 0.857153 0.792143 6
4 2016-01-05 600677.SH 1.193816 0.977033 1.577141 1.050501 0.046061 -0.035317 -0.064841 143.153420 ... 142.623885 0.894452 1.037900 1.041762 1.081964 0.991140 1.615136 2.395483 0.948214 9
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
66556 2022-12-28 601778.SH 1.166667 0.960884 1.377622 1.266684 0.046044 0.000687 0.069954 4.977453 ... 4.896507 4.399394 1.035052 1.100719 1.071506 1.016212 2.179818 3.021692 0.784936 12
66557 2022-12-28 601969.SH 1.138973 0.900169 0.957019 1.302799 0.050182 -0.008470 -0.074989 7.027218 ... 6.931183 1.177856 1.046243 1.210541 1.003863 1.034449 2.389996 3.100826 0.439841 6
66558 2022-12-28 603209.SH 1.221372 0.964005 1.349856 1.248416 0.056133 0.000000 0.023853 33.833164 ... 33.573371 1.716466 1.045042 1.403013 1.285821 0.997634 1.810868 2.314375 0.862636 5
66559 2022-12-28 603255.SH 1.316930 0.920387 0.845352 1.008064 0.062241 -0.582186 -0.918122 36.059273 ... 37.458925 3.619291 1.071255 1.045359 0.960245 0.995641 0.972489 0.873109 0.948662 13
66560 2022-12-28 603527.SH 1.136578 0.931601 1.289250 1.474447 0.048505 -0.037222 -0.116990 28.258543 ... 27.771580 6.531149 1.039641 1.584568 1.352074 1.039712 1.871490 2.260631 0.589693 5

63046 rows × 23 columns

    {"description":"实验创建于2023/2/10","graph":{"edges":[{"to_node_id":"-106:sql1","from_node_id":"-115:data"},{"to_node_id":"-113:sql2","from_node_id":"-118:data"},{"to_node_id":"-121:sql1","from_node_id":"-118:data"},{"to_node_id":"-99:input_1","from_node_id":"-110:predictions"},{"to_node_id":"-79:input_1","from_node_id":"-127:data"},{"to_node_id":"-110:model","from_node_id":"-75:model"},{"to_node_id":"-110:data","from_node_id":"-88:data_1"},{"to_node_id":"-75:data","from_node_id":"-79:data_1"},{"to_node_id":"-88:input_1","from_node_id":"-96:data"},{"to_node_id":"-114:options_data","from_node_id":"-99:data_1"},{"to_node_id":"-114:instruments","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62:data"},{"to_node_id":"-127:sql","from_node_id":"-113:data"},{"to_node_id":"-170:sql1","from_node_id":"-121:data"},{"to_node_id":"-121:sql2","from_node_id":"-124:data"},{"to_node_id":"-96:sql","from_node_id":"-170:data"},{"to_node_id":"-170:sql2","from_node_id":"-128:data"},{"to_node_id":"-106:sql2","from_node_id":"-130:data"},{"to_node_id":"-113:sql1","from_node_id":"-106:data"}],"nodes":[{"node_id":"-115","module_id":"BigQuantSpace.input_features_dai.input_features_dai-v6","parameters":[{"name":"sql","value":"/*\n使用DAI SQL为量化模型预测生成标签数据。标签反映了未来5日的收益率,并且被离散化为20个桶,每个桶代表一个收益率范围。这样,我们就可以训练模型来预测未来的收益率范围,而不仅仅是具体的收益率值。\n\n1. 首先定义了一个名为label_data的临时表,用于计算和存储未来5日收益率,其1%和99%分位数,以及离散化后的收益率(被分为20个桶,每个桶代表一个收益率范围)。\n2. 对未来5日收益率进行了截断处理,只保留在1%和99%分位数之间的值。\n3. 选择了标签值不为空,并且非涨跌停(未来一天的最高价不等于最低价)的数据\n4. 从这个临时表中选择了日期、股票代码和标签字段,以供进模型训练。\n*/\n\nSELECT\n -- 计算的是未来5日的收益率。这是通过将5天后的收盘价除以第二天的开盘价得到的。这里使用的是一个叫做m_lead的函数,它可以获取某个字段在未来某天的值。\n -- _future_return 是一个中间变量名,以 _ 开始的别名列不会在最终结果返回\n m_lead(close, 2) / m_lead(open, 1) AS future_return,\n -- 日期,这是每个股票每天的数据\n date,\n -- 股票代码,代表每一支股票\n instrument\n-- 从cn_stock_bar1d这个表中选择数据,这个表存储的是股票的日线数据\nFROM cn_stock_bar1d\n-- QUALIFY 用于数据过滤 \nQUALIFY\n COLUMNS(*) IS NOT NULL\n -- 标签值不为空,且非涨跌停(未来一天的最高价不等于最低价)\n AND m_lead(high, 1) != m_lead(low, 1)\nORDER BY date, instrument","type":"Literal","bound_global_parameter":null}],"input_ports":[],"output_ports":[{"name":"data","node_id":"-115"}],"cacheable":true,"seq_num":1,"comment":"1. 设置预测目标,例如:根据未来5日收益率预测","comment_collapsed":false,"x":-887.5857238769531,"y":-729.8284912109375},{"node_id":"-118","module_id":"BigQuantSpace.input_features_dai.input_features_dai-v6","parameters":[{"name":"sql","value":"-- 使用DAI SQL获取数据,构建因子等,如下是一个例子作为参考\n-- DAI SQL 语法: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-sql%E5%85%A5%E9%97%A8%E6%95%99%E7%A8%8B\nSELECT\n*\nFROM(\nSELECT\n -- 在这里输入因子表达式\n -- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0\n -- 数据&字段: 数据文档 https://bigquant.com/data/home\n -- 在时间截面的total_market_cap排名\n -- _return_0 是中间变量,以下划线开始的变量是中间值,不会出现在最后的因子里\n m_max(high_0, 5)/m_min(close_0, 5),\n m_CORR(high_0, close_0, 10),\n avg_amount_5/avg_amount_10,\n rank_swing_volatility_5/rank_swing_volatility_30,\n \tvolatility_5\n avg_main_rate_5,\n netflow_xl_rate_0,\n main_rate_0,\n ta_wma_10,\n ta_wma_5,\n \tta_rsi_14,\n amount_0/avg_amount_5,\n ta_ema_5,\n money_netflow_0/avg_money_netflow_5,\n return_5/return_10,\n rank_amount_0/rank_avg_amount_5,\n rank_avg_amount_5/rank_avg_amount_10,\n rank_return_0/rank_return_5,\n turn_0/avg_turn_3,\n turn_0/avg_turn_5,\n\trank_avg_turn_5,\n -- 日期和股票代码\n date,\n instrument\n-- 预计算因子和数据 cn_stock_factors https://bigquant.com/data/datasources/cn_stock_factors\nFROM cn_stock_factors\nWHERE \n -- 剔除ST股票\n st_status = 0\n -- 非停牌股\n AND suspended = 0\n -- 不属于北交所\n AND list_sector < 4\n-- 窗口函数内容过滤需要放在 QUALIFY 这里\nQUALIFY\n -- 去掉有空值的行\n COLUMNS(*) IS NOT NULL\n-- 按日期、股票代码排序\nORDER BY date, instrument)\nAS SubQuery\nORDER BY date, instrument","type":"Literal","bound_global_parameter":null}],"input_ports":[],"output_ports":[{"name":"data","node_id":"-118"}],"cacheable":true,"seq_num":2,"comment":"2. 编写因子","comment_collapsed":false,"x":-218.2720184326172,"y":-623.1422119140625},{"node_id":"-110","module_id":"BigQuantSpace.stock_ranker_dai_predict.stock_ranker_dai_predict-v3","parameters":[],"input_ports":[{"name":"model","node_id":"-110"},{"name":"data","node_id":"-110"}],"output_ports":[{"name":"predictions","node_id":"-110"}],"cacheable":true,"seq_num":14,"comment":"7. 预测","comment_collapsed":false,"x":-258.94126892089844,"y":219.88714599609375},{"node_id":"-127","module_id":"BigQuantSpace.extract_data_dai.extract_data_dai-v7","parameters":[{"name":"start_date","value":"2016-01-01","type":"Literal","bound_global_parameter":null},{"name":"start_date_bound_to_trading_date","value":"False","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2022-12-31","type":"Literal","bound_global_parameter":null},{"name":"end_date_bound_to_trading_date","value":"False","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":90,"type":"Literal","bound_global_parameter":null},{"name":"debug","value":"False","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"sql","node_id":"-127"}],"output_ports":[{"name":"data","node_id":"-127"}],"cacheable":true,"seq_num":4,"comment":"4. 训练数据,设置训练开始时间和结束时间","comment_collapsed":false,"x":-500.14678955078125,"y":-189.39261627197266},{"node_id":"-75","module_id":"BigQuantSpace.stock_ranker_dai_train.stock_ranker_dai_train-v2","parameters":[{"name":"learning_algorithm","value":"排序","type":"Literal","bound_global_parameter":null},{"name":"number_of_leaves","value":"30","type":"Literal","bound_global_parameter":null},{"name":"min_docs_per_leaf","value":"1000","type":"Literal","bound_global_parameter":null},{"name":"number_of_trees","value":"20","type":"Literal","bound_global_parameter":null},{"name":"learning_rate","value":"0.1","type":"Literal","bound_global_parameter":null},{"name":"max_bins","value":"1023","type":"Literal","bound_global_parameter":null},{"name":"feature_fraction","value":"1","type":"Literal","bound_global_parameter":null},{"name":"data_row_fraction","value":"1","type":"Literal","bound_global_parameter":null},{"name":"plot_charts","value":"True","type":"Literal","bound_global_parameter":null},{"name":"ndcg_discount_base","value":"1","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"data","node_id":"-75"},{"name":"validation_ds","node_id":"-75"},{"name":"base_model","node_id":"-75"}],"output_ports":[{"name":"model","node_id":"-75"}],"cacheable":true,"seq_num":5,"comment":"5. 使用StockRanker算法训练","comment_collapsed":false,"x":-357.7325744628906,"y":94.28900909423828},{"node_id":"-88","module_id":"BigQuantSpace.cached.cached-v3","parameters":[{"name":"run","value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3):\n # 示例代码如下。在这里编写您的代码\n df = input_1.read()\n df.drop(columns=['avg_close_2','close', 'low','open','high'], inplace=True)\n# df = df[~df.instrument.astype(str).str.startswith('688') & ~df.instrument.astype(str).str.startswith('300')]\n df = df[~df.instrument.astype(str).str.startswith('688')]\n data_1 = DataSource.write_df(df)\n \n return Outputs(data_1=data_1, data_2=None, data_3=None)\n","type":"Literal","bound_global_parameter":null},{"name":"post_run","value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","type":"Literal","bound_global_parameter":null},{"name":"input_ports","value":"","type":"Literal","bound_global_parameter":null},{"name":"params","value":"{}","type":"Literal","bound_global_parameter":null},{"name":"output_ports","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-88"},{"name":"input_2","node_id":"-88"},{"name":"input_3","node_id":"-88"}],"output_ports":[{"name":"data_1","node_id":"-88"},{"name":"data_2","node_id":"-88"},{"name":"data_3","node_id":"-88"}],"cacheable":true,"seq_num":7,"comment":"","comment_collapsed":true,"x":7.309146404266357,"y":20.64450454711914},{"node_id":"-79","module_id":"BigQuantSpace.cached.cached-v3","parameters":[{"name":"run","value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3):\n # 示例代码如下。在这里编写您的代码\n df = input_1.read()\n # 将'label'列放到第一个位置\n# df = df.reindex(columns=['label'] + [col for col in df.columns if col != 'label'])\n df.drop(columns=['avg_close_2','close', 'low','future_return','open','high'], inplace=True)\n# df = df[~df.instrument.astype(str).str.startswith('688') & ~df.instrument.astype(str).str.startswith('300')]\n df = df[~df.instrument.astype(str).str.startswith('688')]\n data_1 = DataSource.write_df(df)\n \n return Outputs(data_1=data_1, data_2=None, data_3=None)\n","type":"Literal","bound_global_parameter":null},{"name":"post_run","value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","type":"Literal","bound_global_parameter":null},{"name":"input_ports","value":"","type":"Literal","bound_global_parameter":null},{"name":"params","value":"{}","type":"Literal","bound_global_parameter":null},{"name":"output_ports","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-79"},{"name":"input_2","node_id":"-79"},{"name":"input_3","node_id":"-79"}],"output_ports":[{"name":"data_1","node_id":"-79"},{"name":"data_2","node_id":"-79"},{"name":"data_3","node_id":"-79"}],"cacheable":true,"seq_num":9,"comment":"","comment_collapsed":true,"x":-481.7403564453125,"y":-20.752729415893555},{"node_id":"-96","module_id":"BigQuantSpace.extract_data_dai.extract_data_dai-v7","parameters":[{"name":"start_date","value":"2023-01-01","type":"Literal","bound_global_parameter":null},{"name":"start_date_bound_to_trading_date","value":"True","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2023-12-17","type":"Literal","bound_global_parameter":null},{"name":"end_date_bound_to_trading_date","value":"True","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":90,"type":"Literal","bound_global_parameter":null},{"name":"debug","value":"False","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"sql","node_id":"-96"}],"output_ports":[{"name":"data","node_id":"-96"}],"cacheable":true,"seq_num":10,"comment":"6. 预测数据,设置预测时间,开启模拟交易时绑定交易日期","comment_collapsed":true,"x":34.88255310058594,"y":-87.28439712524414},{"node_id":"-99","module_id":"BigQuantSpace.cached.cached-v3","parameters":[{"name":"run","value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3):\n # 示例代码如下。在这里编写您的代码\n df = input_1.read()\n df['instrument'] = df['instrument'].str.replace('.SZ', '.SZA')\n df['instrument'] = df['instrument'].str.replace('.SH', '.SHA')\n data_1 = DataSource.write_df(df)\n \n return Outputs(data_1=data_1, data_2=None, data_3=None)","type":"Literal","bound_global_parameter":null},{"name":"post_run","value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","type":"Literal","bound_global_parameter":null},{"name":"input_ports","value":"","type":"Literal","bound_global_parameter":null},{"name":"params","value":"{}","type":"Literal","bound_global_parameter":null},{"name":"output_ports","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-99"},{"name":"input_2","node_id":"-99"},{"name":"input_3","node_id":"-99"}],"output_ports":[{"name":"data_1","node_id":"-99"},{"name":"data_2","node_id":"-99"},{"name":"data_3","node_id":"-99"}],"cacheable":true,"seq_num":8,"comment":"","comment_collapsed":true,"x":-249,"y":339},{"node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2023-01-01","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2023-12-17","type":"Literal","bound_global_parameter":null},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":"0","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62"}],"output_ports":[{"name":"data","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62"}],"cacheable":true,"seq_num":11,"comment":"预测数据,用于回测和模拟","comment_collapsed":true,"x":-30,"y":-715},{"node_id":"-114","module_id":"BigQuantSpace.trade.trade-v4","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"initialize","value":"# 回测引擎:初始化函数,只执行一次\ndef bigquant_run(context):\n # 加载预测数据\n context.ranker_prediction = context.options['data'].read_df()\n\n # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数\n context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))\n # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)\n # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只\n stock_count = 1\n # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]\n context.stock_weights = [1]\n # 设置每只股票占用的最大资金比例\n context.max_cash_per_instrument = 1\n context.options['hold_days'] = 1\n","type":"Literal","bound_global_parameter":null},{"name":"handle_data","value":"# 回测引擎:每日数据处理函数,每天执行一次\ndef bigquant_run(context, data):\n # 按日期过滤得到今日的预测数据\n ranker_prediction = context.ranker_prediction[\n context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]\n cash_for_buy = min(context.portfolio.portfolio_value/1,context.portfolio.cash)\n buy_instruments = list(ranker_prediction.instrument)\n sell_instruments = [instrument.symbol for instrument in context.portfolio.positions.keys()]\n to_buy = set(buy_instruments[:1]) - set(sell_instruments) \n to_sell = set(sell_instruments) - set(buy_instruments[:1])\n \n for instrument in to_sell:\n context.order_target(context.symbol(instrument), 0)\n for instrument in to_buy:\n context.order_value(context.symbol(instrument), cash_for_buy)","type":"Literal","bound_global_parameter":null},{"name":"prepare","value":"def bigquant_run(context): \n from bigdata.api.datareader import D\n # 获取st状态和涨跌停状态\n context.status_df = D.features(instruments =context.instruments,start_date = context.start_date, end_date = context.end_date, \n fields=['st_status_0','price_limit_status_0','price_limit_status_1'])\n","type":"Literal","bound_global_parameter":null},{"name":"before_trading_start","value":"def bigquant_run(context, data): \n# 获取涨跌停状态数据\n df_price_limit_status=context.status_df.set_index('date')\n today=data.current_dt.strftime('%Y-%m-%d')\n # 得到当前未完成订单\n for orders in get_open_orders().values():\n # 循环,撤销订单\n for _order in orders:\n ins=str(_order.sid.symbol)\n try:\n #判断一下如果当日涨停,则取消卖单\n if df_price_limit_status[df_price_limit_status.instrument==ins].price_limit_status_0.loc[today]>2 and _order.amount<0:\n cancel_order(_order)\n print(today,'尾盘涨停取消卖单',ins) \n except:\n continue\n\n ","type":"Literal","bound_global_parameter":null},{"name":"volume_limit","value":0.025,"type":"Literal","bound_global_parameter":null},{"name":"order_price_field_buy","value":"open","type":"Literal","bound_global_parameter":null},{"name":"order_price_field_sell","value":"close","type":"Literal","bound_global_parameter":null},{"name":"capital_base","value":"1000000","type":"Literal","bound_global_parameter":null},{"name":"auto_cancel_non_tradable_orders","value":"True","type":"Literal","bound_global_parameter":null},{"name":"data_frequency","value":"daily","type":"Literal","bound_global_parameter":null},{"name":"price_type","value":"真实价格","type":"Literal","bound_global_parameter":null},{"name":"product_type","value":"股票","type":"Literal","bound_global_parameter":null},{"name":"plot_charts","value":"True","type":"Literal","bound_global_parameter":null},{"name":"backtest_only","value":"False","type":"Literal","bound_global_parameter":null},{"name":"benchmark","value":"000300.HIX","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-114"},{"name":"options_data","node_id":"-114"},{"name":"history_ds","node_id":"-114"},{"name":"benchmark_ds","node_id":"-114"},{"name":"trading_calendar","node_id":"-114"}],"output_ports":[{"name":"raw_perf","node_id":"-114"}],"cacheable":false,"seq_num":12,"comment":"","comment_collapsed":true,"x":-206,"y":445},{"node_id":"-113","module_id":"BigQuantSpace.sql_join_2.sql_join_2-v1","parameters":[{"name":"sql_join","value":"WITH\nsql1 AS (\n {sql1}\n),\nsql2 AS (\n {sql2}\n),\n\nsql3 AS (SELECT * FROM sql1 JOIN sql2 USING (date, instrument))\nSELECT\n\t*,\n -- 计算的是未来5日的收益率。这是通过将5天后的收盘价除以第二天的开盘价得到的。这里使用的是一个叫做m_lead的函数,它可以获取某个字段在未来某天的值。\n -- 计算未来5日收益率的1%分位数。all_quantile_cont是一个分位数函数,它能够计算出某个字段值的分位数,这里是计算1%的分位数。\n c_quantile_cont(future_return, 0.01) AS _future_return_1pct,\n\n -- 计算未来5日收益率的99%分位数。同样,all_quantile_cont函数用来计算99%的分位数。\n c_quantile_cont(future_return, 0.99) AS _future_return_99pct,\n\n -- 对未来5日收益率进行截断处理,值位于1%和99%分位数之间的数据被保留,超过这个范围的值将被设为边界值。\n clip(future_return, _future_return_1pct, _future_return_99pct) AS _clipped_return,\n\n -- 将截断后的未来5日收益率分为20个不同的桶,每个桶代表一个范围。all_wbins函数用于将数据离散化为多个桶。\n cbins(_clipped_return, 20) AS _binned_return,\n\n -- 将离散化后的数据作为标签使用,这是我们预测的目标。\n _binned_return AS label\n-- 从cn_stock_bar1d这个表中选择数据,这个表存储的是股票的日线数据\nFROM sql3\n-- QUALIFY 用于数据过滤 \nQUALIFY\n COLUMNS(*) IS NOT NULL\nORDER BY date, instrument\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"sql1","node_id":"-113"},{"name":"sql2","node_id":"-113"}],"output_ports":[{"name":"data","node_id":"-113"}],"cacheable":true,"seq_num":13,"comment":"3. 合并因子和标注数据","comment_collapsed":true,"x":-584.9536437988281,"y":-328.8037109375},{"node_id":"-121","module_id":"BigQuantSpace.sql_join_2.sql_join_2-v1","parameters":[{"name":"sql_join","value":"WITH\nsql1 AS (\n {sql1}\n),\nsql2 AS (\n {sql2}\n)\n\nSELECT * FROM sql1 JOIN sql2 USING (date, instrument)\nORDER BY date, instrument\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"sql1","node_id":"-121"},{"name":"sql2","node_id":"-121"}],"output_ports":[{"name":"data","node_id":"-121"}],"cacheable":true,"seq_num":15,"comment":"3. 合并因子和标注数据","comment_collapsed":true,"x":39.95826721191406,"y":-349.21795654296875},{"node_id":"-124","module_id":"BigQuantSpace.input_features_dai.input_features_dai-v6","parameters":[{"name":"sql","value":"SELECT\ndate,\ninstrument,\nprice_limit_status,\nm_lag(price_limit_status,2) as price_limit_status_2\nFROM cn_stock_status\nORDER BY\ndate, instrument\n","type":"Literal","bound_global_parameter":null}],"input_ports":[],"output_ports":[{"name":"data","node_id":"-124"}],"cacheable":true,"seq_num":16,"comment":"2. 编写因子","comment_collapsed":true,"x":206,"y":-514.4729309082031},{"node_id":"-170","module_id":"BigQuantSpace.sql_join_2.sql_join_2-v1","parameters":[{"name":"sql_join","value":"WITH\nsql1 AS (\n {sql1}\n),\nsql2 AS (\n {sql2}\n)\n\nSELECT * FROM sql1 JOIN sql2 USING (date, instrument)\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"sql1","node_id":"-170"},{"name":"sql2","node_id":"-170"}],"output_ports":[{"name":"data","node_id":"-170"}],"cacheable":true,"seq_num":27,"comment":"","comment_collapsed":true,"x":115.54866027832031,"y":-196.88255310058594},{"node_id":"-128","module_id":"BigQuantSpace.input_features_dai.input_features_dai-v6","parameters":[{"name":"sql","value":"-- 使用DAI SQL获取数据,构建因子等,如下是一个例子作为参考\n-- DAI SQL 语法: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-sql%E5%85%A5%E9%97%A8%E6%95%99%E7%A8%8B\nSELECT * FROM(\nSELECT\n\n -- 在这里输入因子表达式\n -- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0\n -- 数据&字段: 数据文档 https://bigquant.com/data/home\n (m_lag(close, 1) + m_lag(close, 2))/2 AS avg_close_2,\n close,\n \topen,\n \thigh,\n \tlow,\n -- 日期和股票代码\n date, instrument\n\n-- 预计算因子 cn_stock_bar1d https://bigquant.com/data/datasources/cn_stock_bar1d\nFROM cn_stock_bar1d\n\nWHERE\n -- 只选取在指定日期范围内的数据,这里多看90天,用于有的算子需要更早的数据\n date BETWEEN DATE '{start_date}' - INTERVAL 90 DAY AND '{end_date}'\n -- 去掉有空值的行\n AND COLUMNS(*) IS NOT NULL\n-- 窗口函数内容过滤需要放在 QUALIFY 这里\nQUALIFY\n -- 去掉有空值的行,如果用到窗口函数(比如 m_lag),这里需要删除,不然会报错\n COLUMNS(*) IS NOT NULL\n\n-- 按日期和股票代码排序,从小到大\nORDER BY date, instrument)\nWHERE\n\t(close/avg_close_2 -1)>0.1\nORDER BY date, instrument\n","type":"Literal","bound_global_parameter":null}],"input_ports":[],"output_ports":[{"name":"data","node_id":"-128"}],"cacheable":true,"seq_num":18,"comment":"2. 编写因子","comment_collapsed":true,"x":387.6568603515625,"y":-380.5733947753906},{"node_id":"-130","module_id":"BigQuantSpace.input_features_dai.input_features_dai-v6","parameters":[{"name":"sql","value":"-- 使用DAI SQL获取数据,构建因子等,如下是一个例子作为参考\n-- DAI SQL 语法: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-sql%E5%85%A5%E9%97%A8%E6%95%99%E7%A8%8B\nSELECT * FROM(\nSELECT\n -- 在这里输入因子表达式\n -- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0\n -- 数据&字段: 数据文档 https://bigquant.com/data/home\n (m_lag(close, 1) + m_lag(close, 2))/2 AS avg_close_2,\n close,\n \topen,\n \thigh,\n \tlow,\n -- 日期和股票代码\n date, instrument\n\n-- 预计算因子 cn_stock_bar1d https://bigquant.com/data/datasources/cn_stock_bar1d\nFROM cn_stock_bar1d\n\nWHERE\n -- 只选取在指定日期范围内的数据,这里多看90天,用于有的算子需要更早的数据\n date BETWEEN DATE '{start_date}' - INTERVAL 90 DAY AND '{end_date}'\n -- 去掉有空值的行\n AND COLUMNS(*) IS NOT NULL\n-- 窗口函数内容过滤需要放在 QUALIFY 这里\nQUALIFY\n -- 去掉有空值的行,如果用到窗口函数(比如 m_lag),这里需要删除,不然会报错\n COLUMNS(*) IS NOT NULL\n\n-- 按日期和股票代码排序,从小到大\nORDER BY date, instrument)\nWHERE\n\t(close/avg_close_2 -1)>0.1\nORDER BY date, instrument\n","type":"Literal","bound_global_parameter":null}],"input_ports":[],"output_ports":[{"name":"data","node_id":"-130"}],"cacheable":true,"seq_num":20,"comment":"","comment_collapsed":true,"x":-514,"y":-754},{"node_id":"-106","module_id":"BigQuantSpace.sql_join_2.sql_join_2-v1","parameters":[{"name":"sql_join","value":"WITH\nsql1 AS (\n {sql1}\n),\nsql2 AS (\n {sql2}\n)\n\nSELECT * FROM sql1 JOIN sql2 USING (date, instrument)\nORDER BY date, instrument\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"sql1","node_id":"-106"},{"name":"sql2","node_id":"-106"}],"output_ports":[{"name":"data","node_id":"-106"}],"cacheable":true,"seq_num":3,"comment":"3. 合并因子和标注数据","comment_collapsed":false,"x":-722,"y":-578}],"node_layout":"<node_postions><node_position Node='-115' Position='-887.5857238769531,-729.8284912109375,200,200'/><node_position Node='-118' Position='-218.2720184326172,-623.1422119140625,200,200'/><node_position Node='-110' Position='-258.94126892089844,219.88714599609375,200,200'/><node_position Node='-127' Position='-500.14678955078125,-189.39261627197266,200,200'/><node_position Node='-75' Position='-357.7325744628906,94.28900909423828,200,200'/><node_position Node='-88' Position='7.309146404266357,20.64450454711914,200,200'/><node_position Node='-79' Position='-481.7403564453125,-20.752729415893555,200,200'/><node_position Node='-96' Position='34.88255310058594,-87.28439712524414,200,200'/><node_position Node='-99' Position='-249,339,200,200'/><node_position Node='287d2cb0-f53c-4101-bdf8-104b137c8601-62' Position='-30,-715,200,200'/><node_position Node='-114' Position='-206,445,200,200'/><node_position Node='-113' Position='-584.9536437988281,-328.8037109375,200,200'/><node_position Node='-121' Position='39.95826721191406,-349.21795654296875,200,200'/><node_position Node='-124' Position='206,-514.4729309082031,200,200'/><node_position Node='-170' Position='115.54866027832031,-196.88255310058594,200,200'/><node_position Node='-128' Position='387.6568603515625,-380.5733947753906,200,200'/><node_position Node='-130' Position='-514,-754,200,200'/><node_position Node='-106' Position='-722,-578,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
    In [10]:
    # 本代码由可视化策略环境自动生成 2023年12月20日 14:27
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
     
    # 显式导入 BigQuant 相关 SDK 模块
    from bigdatasource.api import DataSource
    from bigdata.api.datareader import D
    from biglearning.api import M
    from biglearning.api import tools as T
    from biglearning.module2.common.data import Outputs
     
    import pandas as pd
    import numpy as np
    import math
    import warnings
    import datetime
     
    from zipline.finance.commission import PerOrder
    from zipline.api import get_open_orders
    from zipline.api import symbol
     
    from bigtrader.sdk import *
    from bigtrader.utils.my_collections import NumPyDeque
    from bigtrader.constant import OrderType
    from bigtrader.constant import Direction
    
    # <aistudiograph>
    
    # @param(id="m7", name="run")
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m7_run_bigquant_run(input_1, input_2, input_3):
        # 示例代码如下。在这里编写您的代码
        df = input_1.read()
        df.drop(columns=['avg_close_2','close', 'low','open','high'], inplace=True)
    #     df = df[~df.instrument.astype(str).str.startswith('688') & ~df.instrument.astype(str).str.startswith('300')]
        df = df[~df.instrument.astype(str).str.startswith('688')]
        data_1 = DataSource.write_df(df)
        
        return Outputs(data_1=data_1, data_2=None, data_3=None)
    
    # @param(id="m7", name="post_run")
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m7_post_run_bigquant_run(outputs):
        return outputs
    
    # @param(id="m9", name="run")
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m9_run_bigquant_run(input_1, input_2, input_3):
        # 示例代码如下。在这里编写您的代码
        df = input_1.read()
        # 将'label'列放到第一个位置
    #     df = df.reindex(columns=['label'] + [col for col in df.columns if col != 'label'])
        df.drop(columns=['avg_close_2','close', 'low','future_return','open','high'], inplace=True)
    #     df = df[~df.instrument.astype(str).str.startswith('688') & ~df.instrument.astype(str).str.startswith('300')]
        df = df[~df.instrument.astype(str).str.startswith('688')]
        data_1 = DataSource.write_df(df)
        
        return Outputs(data_1=data_1, data_2=None, data_3=None)
    
    # @param(id="m9", name="post_run")
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m9_post_run_bigquant_run(outputs):
        return outputs
    
    # @param(id="m8", name="run")
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m8_run_bigquant_run(input_1, input_2, input_3):
        # 示例代码如下。在这里编写您的代码
        df = input_1.read()
        df['instrument'] = df['instrument'].str.replace('.SZ', '.SZA')
        df['instrument'] = df['instrument'].str.replace('.SH', '.SHA')
        data_1 = DataSource.write_df(df)
        
        return Outputs(data_1=data_1, data_2=None, data_3=None)
    # @param(id="m8", name="post_run")
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m8_post_run_bigquant_run(outputs):
        return outputs
    
    # @param(id="m12", name="initialize")
    # 回测引擎:初始化函数,只执行一次
    def m12_initialize_bigquant_run(context):
        # 加载预测数据
        context.ranker_prediction = context.options['data'].read_df()
    
        # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
        context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
        # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)
        # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只
        stock_count = 1
        # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]
        context.stock_weights = [1]
        # 设置每只股票占用的最大资金比例
        context.max_cash_per_instrument = 1
        context.options['hold_days'] = 1
    
    # @param(id="m12", name="handle_data")
    # 回测引擎:每日数据处理函数,每天执行一次
    def m12_handle_data_bigquant_run(context, data):
        # 按日期过滤得到今日的预测数据
        ranker_prediction = context.ranker_prediction[
            context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]
        cash_for_buy = min(context.portfolio.portfolio_value/1,context.portfolio.cash)
        buy_instruments = list(ranker_prediction.instrument)
        sell_instruments = [instrument.symbol for instrument in context.portfolio.positions.keys()]
        to_buy = set(buy_instruments[:1]) - set(sell_instruments) 
        to_sell = set(sell_instruments) -  set(buy_instruments[:1])
        
        for instrument in to_sell:
            context.order_target(context.symbol(instrument), 0)
        for instrument in to_buy:
            context.order_value(context.symbol(instrument), cash_for_buy)
    # @param(id="m12", name="prepare")
    def m12_prepare_bigquant_run(context):  
        from bigdata.api.datareader import D
         # 获取st状态和涨跌停状态
        context.status_df = D.features(instruments =context.instruments,start_date = context.start_date, end_date = context.end_date, 
                               fields=['st_status_0','price_limit_status_0','price_limit_status_1'])
    
    # @param(id="m12", name="before_trading_start")
    def m12_before_trading_start_bigquant_run(context, data):  
    #     获取涨跌停状态数据
        df_price_limit_status=context.status_df.set_index('date')
        today=data.current_dt.strftime('%Y-%m-%d')
        # 得到当前未完成订单
        for orders in get_open_orders().values():
            # 循环,撤销订单
            for _order in orders:
                ins=str(_order.sid.symbol)
                try:
                    #判断一下如果当日涨停,则取消卖单
                    if  df_price_limit_status[df_price_limit_status.instrument==ins].price_limit_status_0.loc[today]>2 and _order.amount<0:
                        cancel_order(_order)
                        print(today,'尾盘涨停取消卖单',ins) 
                except:
                    continue
    
      
    
    # @module(position="-887.5857238769531,-729.8284912109375", comment='1. 设置预测目标,例如:根据未来5日收益率预测', comment_collapsed=False)
    m1 = M.input_features_dai.v6(
        sql="""/*
    使用DAI SQL为量化模型预测生成标签数据。标签反映了未来5日的收益率,并且被离散化为20个桶,每个桶代表一个收益率范围。这样,我们就可以训练模型来预测未来的收益率范围,而不仅仅是具体的收益率值。
    
    1. 首先定义了一个名为label_data的临时表,用于计算和存储未来5日收益率,其1%和99%分位数,以及离散化后的收益率(被分为20个桶,每个桶代表一个收益率范围)。
    2. 对未来5日收益率进行了截断处理,只保留在1%和99%分位数之间的值。
    3. 选择了标签值不为空,并且非涨跌停(未来一天的最高价不等于最低价)的数据
    4. 从这个临时表中选择了日期、股票代码和标签字段,以供进模型训练。
    */
    
    SELECT
        -- 计算的是未来5日的收益率。这是通过将5天后的收盘价除以第二天的开盘价得到的。这里使用的是一个叫做m_lead的函数,它可以获取某个字段在未来某天的值。
        -- _future_return 是一个中间变量名,以 _ 开始的别名列不会在最终结果返回
        m_lead(close, 2) / m_lead(open, 1) AS future_return,
        -- 日期,这是每个股票每天的数据
        date,
        -- 股票代码,代表每一支股票
        instrument
    -- 从cn_stock_bar1d这个表中选择数据,这个表存储的是股票的日线数据
    FROM cn_stock_bar1d
    -- QUALIFY 用于数据过滤 
    QUALIFY
        COLUMNS(*) IS NOT NULL
        -- 标签值不为空,且非涨跌停(未来一天的最高价不等于最低价)
        AND m_lead(high, 1) != m_lead(low, 1)
    ORDER BY date, instrument"""
    )
    
    # @module(position="-218.2720184326172,-623.1422119140625", comment='2. 编写因子', comment_collapsed=False)
    m2 = M.input_features_dai.v6(
        sql="""-- 使用DAI SQL获取数据,构建因子等,如下是一个例子作为参考
    -- DAI SQL 语法: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-sql%E5%85%A5%E9%97%A8%E6%95%99%E7%A8%8B
    SELECT
    *
    FROM(
    SELECT
        -- 在这里输入因子表达式
        -- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
        -- 数据&字段: 数据文档 https://bigquant.com/data/home
        -- 在时间截面的total_market_cap排名
        -- _return_0 是中间变量,以下划线开始的变量是中间值,不会出现在最后的因子里
        m_max(high_0, 5)/m_min(close_0, 5),
        m_CORR(high_0, close_0, 10),
        avg_amount_5/avg_amount_10,
        rank_swing_volatility_5/rank_swing_volatility_30,
      	volatility_5
        avg_main_rate_5,
        netflow_xl_rate_0,
        main_rate_0,
        ta_wma_10,
        ta_wma_5,
      	ta_rsi_14,
        amount_0/avg_amount_5,
        ta_ema_5,
        money_netflow_0/avg_money_netflow_5,
        return_5/return_10,
        rank_amount_0/rank_avg_amount_5,
        rank_avg_amount_5/rank_avg_amount_10,
        rank_return_0/rank_return_5,
        turn_0/avg_turn_3,
        turn_0/avg_turn_5,
    	rank_avg_turn_5,
        -- 日期和股票代码
        date,
        instrument
    -- 预计算因子和数据 cn_stock_factors https://bigquant.com/data/datasources/cn_stock_factors
    FROM cn_stock_factors
    WHERE 
        -- 剔除ST股票
        st_status = 0
        -- 非停牌股
        AND suspended = 0
        -- 不属于北交所
        AND list_sector < 4
    -- 窗口函数内容过滤需要放在 QUALIFY 这里
    QUALIFY
        -- 去掉有空值的行
        COLUMNS(*) IS NOT NULL
    -- 按日期、股票代码排序
    ORDER BY date, instrument)
    AS SubQuery
    ORDER BY date, instrument"""
    )
    
    # @module(position="-30,-715", comment='预测数据,用于回测和模拟', comment_collapsed=True)
    m11 = M.instruments.v2(
        start_date='2023-01-01',
        end_date='2023-12-17',
        market='CN_STOCK_A',
        instrument_list='',
        max_count=0
    )
    
    # @module(position="206,-514.4729309082031", comment='2. 编写因子', comment_collapsed=True)
    m16 = M.input_features_dai.v6(
        sql="""SELECT
    date,
    instrument,
    price_limit_status,
    m_lag(price_limit_status,2) as price_limit_status_2
    FROM cn_stock_status
    ORDER BY
    date, instrument
    """
    )
    
    # @module(position="39.95826721191406,-349.21795654296875", comment='3. 合并因子和标注数据', comment_collapsed=True)
    m15 = M.sql_join_2.v1(
        sql1=m2.data,
        sql2=m16.data,
        sql_join="""WITH
    sql1 AS (
        {sql1}
    ),
    sql2 AS (
        {sql2}
    )
    
    SELECT * FROM sql1 JOIN sql2 USING (date, instrument)
    ORDER BY date, instrument
    """
    )
    
    # @module(position="387.6568603515625,-380.5733947753906", comment='2. 编写因子', comment_collapsed=True)
    m18 = M.input_features_dai.v6(
        sql="""-- 使用DAI SQL获取数据,构建因子等,如下是一个例子作为参考
    -- DAI SQL 语法: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-sql%E5%85%A5%E9%97%A8%E6%95%99%E7%A8%8B
    SELECT * FROM(
    SELECT
    
        -- 在这里输入因子表达式
        -- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
        -- 数据&字段: 数据文档 https://bigquant.com/data/home
        (m_lag(close, 1) + m_lag(close, 2))/2 AS avg_close_2,
        close,
      	open,
      	high,
      	low,
        -- 日期和股票代码
        date, instrument
    
    -- 预计算因子 cn_stock_bar1d https://bigquant.com/data/datasources/cn_stock_bar1d
    FROM cn_stock_bar1d
    
    WHERE
        -- 只选取在指定日期范围内的数据,这里多看90天,用于有的算子需要更早的数据
        date BETWEEN DATE '{start_date}' - INTERVAL 90 DAY AND '{end_date}'
        -- 去掉有空值的行
        AND COLUMNS(*) IS NOT NULL
    -- 窗口函数内容过滤需要放在 QUALIFY 这里
    QUALIFY
        -- 去掉有空值的行,如果用到窗口函数(比如 m_lag),这里需要删除,不然会报错
        COLUMNS(*) IS NOT NULL
    
    -- 按日期和股票代码排序,从小到大
    ORDER BY date, instrument)
    WHERE
    	(close/avg_close_2 -1)>0.1
    ORDER BY date, instrument
    """
    )
    
    # @module(position="115.54866027832031,-196.88255310058594", comment='', comment_collapsed=True)
    m27 = M.sql_join_2.v1(
        sql1=m15.data,
        sql2=m18.data,
        sql_join="""WITH
    sql1 AS (
        {sql1}
    ),
    sql2 AS (
        {sql2}
    )
    
    SELECT * FROM sql1 JOIN sql2 USING (date, instrument)
    """
    )
    
    # @module(position="34.88255310058594,-87.28439712524414", comment='6. 预测数据,设置预测时间,开启模拟交易时绑定交易日期', comment_collapsed=True)
    m10 = M.extract_data_dai.v7(
        sql=m27.data,
        start_date='2023-01-01',
        start_date_bound_to_trading_date=True,
        end_date='2023-12-17',
        end_date_bound_to_trading_date=True,
        before_start_days=90,
        debug=False
    )
    
    # @module(position="7.309146404266357,20.64450454711914", comment='', comment_collapsed=True)
    m7 = M.cached.v3(
        input_1=m10.data,
        run=m7_run_bigquant_run,
        post_run=m7_post_run_bigquant_run,
        input_ports='',
        params='{}',
        output_ports=''
    )
    
    # @module(position="-514,-754", comment='', comment_collapsed=True)
    m20 = M.input_features_dai.v6(
        sql="""-- 使用DAI SQL获取数据,构建因子等,如下是一个例子作为参考
    -- DAI SQL 语法: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-sql%E5%85%A5%E9%97%A8%E6%95%99%E7%A8%8B
    SELECT * FROM(
    SELECT
        -- 在这里输入因子表达式
        -- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
        -- 数据&字段: 数据文档 https://bigquant.com/data/home
        (m_lag(close, 1) + m_lag(close, 2))/2 AS avg_close_2,
        close,
      	open,
      	high,
      	low,
        -- 日期和股票代码
        date, instrument
    
    -- 预计算因子 cn_stock_bar1d https://bigquant.com/data/datasources/cn_stock_bar1d
    FROM cn_stock_bar1d
    
    WHERE
        -- 只选取在指定日期范围内的数据,这里多看90天,用于有的算子需要更早的数据
        date BETWEEN DATE '{start_date}' - INTERVAL 90 DAY AND '{end_date}'
        -- 去掉有空值的行
        AND COLUMNS(*) IS NOT NULL
    -- 窗口函数内容过滤需要放在 QUALIFY 这里
    QUALIFY
        -- 去掉有空值的行,如果用到窗口函数(比如 m_lag),这里需要删除,不然会报错
        COLUMNS(*) IS NOT NULL
    
    -- 按日期和股票代码排序,从小到大
    ORDER BY date, instrument)
    WHERE
    	(close/avg_close_2 -1)>0.1
    ORDER BY date, instrument
    """
    )
    
    # @module(position="-722,-578", comment='3. 合并因子和标注数据', comment_collapsed=False)
    m3 = M.sql_join_2.v1(
        sql1=m1.data,
        sql2=m20.data,
        sql_join="""WITH
    sql1 AS (
        {sql1}
    ),
    sql2 AS (
        {sql2}
    )
    
    SELECT * FROM sql1 JOIN sql2 USING (date, instrument)
    ORDER BY date, instrument
    """
    )
    
    # @module(position="-584.9536437988281,-328.8037109375", comment='3. 合并因子和标注数据', comment_collapsed=True)
    m13 = M.sql_join_2.v1(
        sql1=m3.data,
        sql2=m2.data,
        sql_join="""WITH
    sql1 AS (
        {sql1}
    ),
    sql2 AS (
        {sql2}
    ),
    
    sql3 AS (SELECT * FROM sql1 JOIN sql2 USING (date, instrument))
    SELECT
    	*,
        -- 计算的是未来5日的收益率。这是通过将5天后的收盘价除以第二天的开盘价得到的。这里使用的是一个叫做m_lead的函数,它可以获取某个字段在未来某天的值。
        -- 计算未来5日收益率的1%分位数。all_quantile_cont是一个分位数函数,它能够计算出某个字段值的分位数,这里是计算1%的分位数。
        c_quantile_cont(future_return, 0.01) AS _future_return_1pct,
    
        -- 计算未来5日收益率的99%分位数。同样,all_quantile_cont函数用来计算99%的分位数。
        c_quantile_cont(future_return, 0.99) AS _future_return_99pct,
    
        -- 对未来5日收益率进行截断处理,值位于1%和99%分位数之间的数据被保留,超过这个范围的值将被设为边界值。
        clip(future_return, _future_return_1pct, _future_return_99pct) AS _clipped_return,
    
        -- 将截断后的未来5日收益率分为20个不同的桶,每个桶代表一个范围。all_wbins函数用于将数据离散化为多个桶。
        cbins(_clipped_return, 20) AS _binned_return,
    
        -- 将离散化后的数据作为标签使用,这是我们预测的目标。
        _binned_return AS label
    -- 从cn_stock_bar1d这个表中选择数据,这个表存储的是股票的日线数据
    FROM sql3
    -- QUALIFY 用于数据过滤 
    QUALIFY
        COLUMNS(*) IS NOT NULL
    ORDER BY date, instrument
    """
    )
    
    # @module(position="-500.14678955078125,-189.39261627197266", comment='4. 训练数据,设置训练开始时间和结束时间', comment_collapsed=False)
    m4 = M.extract_data_dai.v7(
        sql=m13.data,
        start_date='2016-01-01',
        start_date_bound_to_trading_date=False,
        end_date='2022-12-31',
        end_date_bound_to_trading_date=False,
        before_start_days=90,
        debug=False
    )
    
    # @module(position="-481.7403564453125,-20.752729415893555", comment='', comment_collapsed=True)
    m9 = M.cached.v3(
        input_1=m4.data,
        run=m9_run_bigquant_run,
        post_run=m9_post_run_bigquant_run,
        input_ports='',
        params='{}',
        output_ports=''
    )
    
    # @module(position="-357.7325744628906,94.28900909423828", comment='5. 使用StockRanker算法训练', comment_collapsed=False)
    m5 = M.stock_ranker_dai_train.v2(
        data=m9.data_1,
        learning_algorithm='排序',
        number_of_leaves=30,
        min_docs_per_leaf=1000,
        number_of_trees=20,
        learning_rate=0.1,
        max_bins=1023,
        feature_fraction=1,
        data_row_fraction=1,
        plot_charts=True,
        ndcg_discount_base=1
    )
    
    # @module(position="-258.94126892089844,219.88714599609375", comment='7. 预测', comment_collapsed=False)
    m14 = M.stock_ranker_dai_predict.v3(
        model=m5.model,
        data=m7.data_1
    )
    
    # @module(position="-249,339", comment='', comment_collapsed=True)
    m8 = M.cached.v3(
        input_1=m14.predictions,
        run=m8_run_bigquant_run,
        post_run=m8_post_run_bigquant_run,
        input_ports='',
        params='{}',
        output_ports=''
    )
    
    # @module(position="-206,445", comment='', comment_collapsed=True)
    m12 = M.trade.v4(
        instruments=m11.data,
        options_data=m8.data_1,
        start_date='',
        end_date='',
        initialize=m12_initialize_bigquant_run,
        handle_data=m12_handle_data_bigquant_run,
        prepare=m12_prepare_bigquant_run,
        before_trading_start=m12_before_trading_start_bigquant_run,
        volume_limit=0.025,
        order_price_field_buy='open',
        order_price_field_sell='close',
        capital_base=1000000,
        auto_cancel_non_tradable_orders=True,
        data_frequency='daily',
        price_type='真实价格',
        product_type='股票',
        plot_charts=True,
        backtest_only=False,
        benchmark='000300.HIX'
    )
    # </aistudiograph>
    
    ---------------------------------------------------------------------------
    IndexError                                Traceback (most recent call last)
    Cell In[10], line 455
        445 m9 = M.cached.v3(
        446     input_1=m4.data,
        447     run=m9_run_bigquant_run,
       (...)
        451     output_ports=''
        452 )
        454 # @module(position="-357.7325744628906,94.28900909423828", comment='5. 使用StockRanker算法训练', comment_collapsed=False)
    --> 455 m5 = M.stock_ranker_dai_train.v2(
        456     data=m9.data_1,
        457     learning_algorithm='排序',
        458     number_of_leaves=30,
        459     min_docs_per_leaf=1000,
        460     number_of_trees=20,
        461     learning_rate=0.1,
        462     max_bins=1023,
        463     feature_fraction=1,
        464     data_row_fraction=1,
        465     plot_charts=True,
        466     ndcg_discount_base=1
        467 )
        469 # @module(position="-258.94126892089844,219.88714599609375", comment='7. 预测', comment_collapsed=False)
        470 m14 = M.stock_ranker_dai_predict.v3(
        471     model=m5.model,
        472     data=m7.data_1
        473 )
    
    File module2/common/modulemanagerv2.py:88, in biglearning.module2.common.modulemanagerv2.BigQuantModuleVersion.__call__()
    
    File module2/common/moduleinvoker.py:370, in biglearning.module2.common.moduleinvoker.module_invoke()
    
    File module2/common/moduleinvoker.py:292, in biglearning.module2.common.moduleinvoker._invoke_with_cache()
    
    File module2/common/moduleinvoker.py:253, in biglearning.module2.common.moduleinvoker._invoke_with_cache()
    
    File module2/common/moduleinvoker.py:210, in biglearning.module2.common.moduleinvoker._module_run()
    
    File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:913, in run(data, validation_ds, learning_algorithm, number_of_leaves, min_docs_per_leaf, number_of_trees, learning_rate, max_bins, feature_fraction, data_row_fraction, plot_charts, ndcg_discount_base, base_model)
        910 validation_data = validation_ds.read() if validation_ds is not None else None
        911 logger.info(f"data loaded: train={data.shape}, validation={validation_data.shape if validation_data is not None else None}")
    --> 913 result = train(
        914     features=None,
        915     data=data,
        916     validation_data=validation_data,
        917     learning_algorithm=learning_algorithm,
        918     learning_rate=learning_rate,
        919     max_bins=max_bins,
        920     number_of_trees=number_of_trees,
        921     number_of_leaves=number_of_leaves,
        922     min_docs_per_leaf=min_docs_per_leaf,
        923     feature_fraction=feature_fraction,
        924     data_row_fraction=data_row_fraction,
        925     ndcg_discount_base=ndcg_discount_base,
        926     base_model_id=None,
        927     # fai_cluster=None,
        928 )
        930 return Outputs(
        931     model=dai.DataSource.write_text(json.dumps(result["model"])),
        932     feature_gains=dai.DataSource.write_bdb(result["feature_gains"]),
       (...)
        935     plot_charts=plot_charts,
        936 )
    
    File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:876, in train(features, data, validation_data, learning_algorithm, learning_rate, max_bins, number_of_trees, number_of_leaves, min_docs_per_leaf, feature_fraction, data_row_fraction, ndcg_discount_base, base_model_id, fai_cluster, **kwargs)
        874     result = fai.get(fai.remote(_train).remote(**kwargs))
        875 else:
    --> 876     result = _train(**kwargs)
        877 return result
    
    File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:691, in train.<locals>._train(features, data, validation_data, learning_algorithm, learning_rate, max_bins, number_of_trees, number_of_leaves, min_docs_per_leaf, feature_fraction, data_row_fraction, ndcg_discount_base, base_model_id)
        689 # TODO: 优化日志
        690 logger.info("数据预处理 ..")
    --> 691 bin_data = df_to_bin(
        692     working_root=working_root,
        693     dcg_trunc=10,
        694     na_label=None,
        695     sort=True,
        696     bins="training",
        697     max_bins=max_bins,
        698     selected_columns=feature_names,
        699     training=data,
        700     validation=validation_data,
        701 )
        702 logger.info(f"训练集预处理完成 rows={bin_data.training_rows}, cols={bin_data.training_cols}")
        703 if validation_data is not None:
    
    File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:568, in train.<locals>.df_to_bin(working_root, dcg_trunc, na_label, bins, max_bins, selected_columns, sort, **kwargs)
        566     output["%s_rows" % name] = len(df)
        567     output["%s_cols" % name] = len(df.columns)
    --> 568     output["%s_bin_path" % name] = __convert_to_bin(f"{working_root}/{name}.bin", df, rating_to_label, label_gains, dcg_trunc)
        570 return Outputs(**output)
    
    File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:480, in train.<locals>.__convert_to_bin(output_path, df, rating_to_label, label_gains, dcg_trunc)
        479 def __convert_to_bin(output_path, df, rating_to_label, label_gains, dcg_trunc):
    --> 480     dataframe_to_bin(
        481         df,
        482         output_path,
        483         rating_to_label=rating_to_label and dict(rating_to_label),
        484         label_gains=label_gains,
        485         dcg_trunc=dcg_trunc,
        486     )
        487     return output_path
    
    File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:465, in train.<locals>.dataframe_to_bin(df, output_bin_path, rating_to_label, label_gains, dcg_trunc, label_column)
        454 def dataframe_to_bin(df, output_bin_path, rating_to_label=None, label_gains=None, dcg_trunc=10, label_column="label"):
        455     """转化为stockranker训练数据格式
        456 
        457     Args:
       (...)
        463         label_column: TODO
        464     """
    --> 465     return DataFrame2Bin(rating_to_label, label_gains, dcg_trunc, label_column).convert(df, output_bin_path)
    
    File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:425, in train.<locals>.DataFrame2Bin.convert(self, df, output_bin_path)
        422     self.__writer = output_bin_path
        423     self.__require_close = False
    --> 425 self.__calculate()
        427 self.__writer.seek(self.__header_size, SEEK_CUR)
        428 self.__write_dataset_skeleton()
    
    File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:328, in train.<locals>.DataFrame2Bin.__calculate(self)
        326 self.__query_ids = self.__df["m:QueryId"].drop_duplicates()
        327 self.__boundaries = list(self.__query_ids.index) + [self.__num_docs]
    --> 328 self.__max_dcg = self.__max_dcg_range()
        330 self.__doc_to_query = [0] * len(self.__doc_ids)
        331 for q in range(0, len(self.__query_ids)):
    
    File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:300, in train.<locals>.DataFrame2Bin.__max_dcg_range(self)
        298     label_counts[i] = 0
        299 for k in range(self.__boundaries[q], self.__boundaries[q + 1]):
    --> 300     label = self.__labels[k]
        301     label_counts[label] += 1
        303 top_label = relevancy_level - 1
    
    IndexError: list index out of range