{"description":"实验创建于2023/2/10","graph":{"edges":[{"to_node_id":"-106:sql1","from_node_id":"-115:data"},{"to_node_id":"-113:sql2","from_node_id":"-118:data"},{"to_node_id":"-121:sql1","from_node_id":"-118:data"},{"to_node_id":"-99:input_1","from_node_id":"-110:predictions"},{"to_node_id":"-79:input_1","from_node_id":"-127:data"},{"to_node_id":"-110:model","from_node_id":"-75:model"},{"to_node_id":"-110:data","from_node_id":"-88:data_1"},{"to_node_id":"-75:data","from_node_id":"-79:data_1"},{"to_node_id":"-88:input_1","from_node_id":"-96:data"},{"to_node_id":"-114:options_data","from_node_id":"-99:data_1"},{"to_node_id":"-114:instruments","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62:data"},{"to_node_id":"-127:sql","from_node_id":"-113:data"},{"to_node_id":"-170:sql1","from_node_id":"-121:data"},{"to_node_id":"-121:sql2","from_node_id":"-124:data"},{"to_node_id":"-96:sql","from_node_id":"-170:data"},{"to_node_id":"-170:sql2","from_node_id":"-128:data"},{"to_node_id":"-106:sql2","from_node_id":"-130:data"},{"to_node_id":"-113:sql1","from_node_id":"-106:data"}],"nodes":[{"node_id":"-115","module_id":"BigQuantSpace.input_features_dai.input_features_dai-v6","parameters":[{"name":"sql","value":"/*\n使用DAI SQL为量化模型预测生成标签数据。标签反映了未来5日的收益率,并且被离散化为20个桶,每个桶代表一个收益率范围。这样,我们就可以训练模型来预测未来的收益率范围,而不仅仅是具体的收益率值。\n\n1. 首先定义了一个名为label_data的临时表,用于计算和存储未来5日收益率,其1%和99%分位数,以及离散化后的收益率(被分为20个桶,每个桶代表一个收益率范围)。\n2. 对未来5日收益率进行了截断处理,只保留在1%和99%分位数之间的值。\n3. 选择了标签值不为空,并且非涨跌停(未来一天的最高价不等于最低价)的数据\n4. 从这个临时表中选择了日期、股票代码和标签字段,以供进模型训练。\n*/\n\nSELECT\n -- 计算的是未来5日的收益率。这是通过将5天后的收盘价除以第二天的开盘价得到的。这里使用的是一个叫做m_lead的函数,它可以获取某个字段在未来某天的值。\n -- _future_return 是一个中间变量名,以 _ 开始的别名列不会在最终结果返回\n m_lead(close, 2) / m_lead(open, 1) AS future_return,\n -- 日期,这是每个股票每天的数据\n date,\n -- 股票代码,代表每一支股票\n instrument\n-- 从cn_stock_bar1d这个表中选择数据,这个表存储的是股票的日线数据\nFROM cn_stock_bar1d\n-- QUALIFY 用于数据过滤 \nQUALIFY\n COLUMNS(*) IS NOT NULL\n -- 标签值不为空,且非涨跌停(未来一天的最高价不等于最低价)\n AND m_lead(high, 1) != m_lead(low, 1)\nORDER BY date, instrument","type":"Literal","bound_global_parameter":null}],"input_ports":[],"output_ports":[{"name":"data","node_id":"-115"}],"cacheable":true,"seq_num":1,"comment":"1. 设置预测目标,例如:根据未来5日收益率预测","comment_collapsed":false,"x":-887.5857238769531,"y":-729.8284912109375},{"node_id":"-118","module_id":"BigQuantSpace.input_features_dai.input_features_dai-v6","parameters":[{"name":"sql","value":"-- 使用DAI SQL获取数据,构建因子等,如下是一个例子作为参考\n-- DAI SQL 语法: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-sql%E5%85%A5%E9%97%A8%E6%95%99%E7%A8%8B\nSELECT\n*\nFROM(\nSELECT\n -- 在这里输入因子表达式\n -- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0\n -- 数据&字段: 数据文档 https://bigquant.com/data/home\n -- 在时间截面的total_market_cap排名\n -- _return_0 是中间变量,以下划线开始的变量是中间值,不会出现在最后的因子里\n m_max(high_0, 5)/m_min(close_0, 5),\n m_CORR(high_0, close_0, 10),\n avg_amount_5/avg_amount_10,\n rank_swing_volatility_5/rank_swing_volatility_30,\n \tvolatility_5\n avg_main_rate_5,\n netflow_xl_rate_0,\n main_rate_0,\n ta_wma_10,\n ta_wma_5,\n \tta_rsi_14,\n amount_0/avg_amount_5,\n ta_ema_5,\n money_netflow_0/avg_money_netflow_5,\n return_5/return_10,\n rank_amount_0/rank_avg_amount_5,\n rank_avg_amount_5/rank_avg_amount_10,\n rank_return_0/rank_return_5,\n turn_0/avg_turn_3,\n turn_0/avg_turn_5,\n\trank_avg_turn_5,\n -- 日期和股票代码\n date,\n instrument\n-- 预计算因子和数据 cn_stock_factors https://bigquant.com/data/datasources/cn_stock_factors\nFROM cn_stock_factors\nWHERE \n -- 剔除ST股票\n st_status = 0\n -- 非停牌股\n AND suspended = 0\n -- 不属于北交所\n AND list_sector < 4\n-- 窗口函数内容过滤需要放在 QUALIFY 这里\nQUALIFY\n -- 去掉有空值的行\n COLUMNS(*) IS NOT NULL\n-- 按日期、股票代码排序\nORDER BY date, instrument)\nAS SubQuery\nORDER BY date, instrument","type":"Literal","bound_global_parameter":null}],"input_ports":[],"output_ports":[{"name":"data","node_id":"-118"}],"cacheable":true,"seq_num":2,"comment":"2. 编写因子","comment_collapsed":false,"x":-218.2720184326172,"y":-623.1422119140625},{"node_id":"-110","module_id":"BigQuantSpace.stock_ranker_dai_predict.stock_ranker_dai_predict-v3","parameters":[],"input_ports":[{"name":"model","node_id":"-110"},{"name":"data","node_id":"-110"}],"output_ports":[{"name":"predictions","node_id":"-110"}],"cacheable":true,"seq_num":14,"comment":"7. 预测","comment_collapsed":false,"x":-258.94126892089844,"y":219.88714599609375},{"node_id":"-127","module_id":"BigQuantSpace.extract_data_dai.extract_data_dai-v7","parameters":[{"name":"start_date","value":"2016-01-01","type":"Literal","bound_global_parameter":null},{"name":"start_date_bound_to_trading_date","value":"False","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2022-12-31","type":"Literal","bound_global_parameter":null},{"name":"end_date_bound_to_trading_date","value":"False","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":90,"type":"Literal","bound_global_parameter":null},{"name":"debug","value":"False","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"sql","node_id":"-127"}],"output_ports":[{"name":"data","node_id":"-127"}],"cacheable":true,"seq_num":4,"comment":"4. 训练数据,设置训练开始时间和结束时间","comment_collapsed":false,"x":-500.14678955078125,"y":-189.39261627197266},{"node_id":"-75","module_id":"BigQuantSpace.stock_ranker_dai_train.stock_ranker_dai_train-v2","parameters":[{"name":"learning_algorithm","value":"排序","type":"Literal","bound_global_parameter":null},{"name":"number_of_leaves","value":"30","type":"Literal","bound_global_parameter":null},{"name":"min_docs_per_leaf","value":"1000","type":"Literal","bound_global_parameter":null},{"name":"number_of_trees","value":"20","type":"Literal","bound_global_parameter":null},{"name":"learning_rate","value":"0.1","type":"Literal","bound_global_parameter":null},{"name":"max_bins","value":"1023","type":"Literal","bound_global_parameter":null},{"name":"feature_fraction","value":"1","type":"Literal","bound_global_parameter":null},{"name":"data_row_fraction","value":"1","type":"Literal","bound_global_parameter":null},{"name":"plot_charts","value":"True","type":"Literal","bound_global_parameter":null},{"name":"ndcg_discount_base","value":"1","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"data","node_id":"-75"},{"name":"validation_ds","node_id":"-75"},{"name":"base_model","node_id":"-75"}],"output_ports":[{"name":"model","node_id":"-75"}],"cacheable":true,"seq_num":5,"comment":"5. 使用StockRanker算法训练","comment_collapsed":false,"x":-357.7325744628906,"y":94.28900909423828},{"node_id":"-88","module_id":"BigQuantSpace.cached.cached-v3","parameters":[{"name":"run","value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3):\n # 示例代码如下。在这里编写您的代码\n df = input_1.read()\n df.drop(columns=['avg_close_2','close', 'low','open','high'], inplace=True)\n# df = df[~df.instrument.astype(str).str.startswith('688') & ~df.instrument.astype(str).str.startswith('300')]\n df = df[~df.instrument.astype(str).str.startswith('688')]\n data_1 = DataSource.write_df(df)\n \n return Outputs(data_1=data_1, data_2=None, data_3=None)\n","type":"Literal","bound_global_parameter":null},{"name":"post_run","value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","type":"Literal","bound_global_parameter":null},{"name":"input_ports","value":"","type":"Literal","bound_global_parameter":null},{"name":"params","value":"{}","type":"Literal","bound_global_parameter":null},{"name":"output_ports","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-88"},{"name":"input_2","node_id":"-88"},{"name":"input_3","node_id":"-88"}],"output_ports":[{"name":"data_1","node_id":"-88"},{"name":"data_2","node_id":"-88"},{"name":"data_3","node_id":"-88"}],"cacheable":true,"seq_num":7,"comment":"","comment_collapsed":true,"x":7.309146404266357,"y":20.64450454711914},{"node_id":"-79","module_id":"BigQuantSpace.cached.cached-v3","parameters":[{"name":"run","value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3):\n # 示例代码如下。在这里编写您的代码\n df = input_1.read()\n # 将'label'列放到第一个位置\n# df = df.reindex(columns=['label'] + [col for col in df.columns if col != 'label'])\n df.drop(columns=['avg_close_2','close', 'low','future_return','open','high'], inplace=True)\n# df = df[~df.instrument.astype(str).str.startswith('688') & ~df.instrument.astype(str).str.startswith('300')]\n df = df[~df.instrument.astype(str).str.startswith('688')]\n data_1 = DataSource.write_df(df)\n \n return Outputs(data_1=data_1, data_2=None, data_3=None)\n","type":"Literal","bound_global_parameter":null},{"name":"post_run","value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","type":"Literal","bound_global_parameter":null},{"name":"input_ports","value":"","type":"Literal","bound_global_parameter":null},{"name":"params","value":"{}","type":"Literal","bound_global_parameter":null},{"name":"output_ports","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-79"},{"name":"input_2","node_id":"-79"},{"name":"input_3","node_id":"-79"}],"output_ports":[{"name":"data_1","node_id":"-79"},{"name":"data_2","node_id":"-79"},{"name":"data_3","node_id":"-79"}],"cacheable":true,"seq_num":9,"comment":"","comment_collapsed":true,"x":-481.7403564453125,"y":-20.752729415893555},{"node_id":"-96","module_id":"BigQuantSpace.extract_data_dai.extract_data_dai-v7","parameters":[{"name":"start_date","value":"2023-01-01","type":"Literal","bound_global_parameter":null},{"name":"start_date_bound_to_trading_date","value":"True","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2023-12-17","type":"Literal","bound_global_parameter":null},{"name":"end_date_bound_to_trading_date","value":"True","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":90,"type":"Literal","bound_global_parameter":null},{"name":"debug","value":"False","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"sql","node_id":"-96"}],"output_ports":[{"name":"data","node_id":"-96"}],"cacheable":true,"seq_num":10,"comment":"6. 预测数据,设置预测时间,开启模拟交易时绑定交易日期","comment_collapsed":true,"x":34.88255310058594,"y":-87.28439712524414},{"node_id":"-99","module_id":"BigQuantSpace.cached.cached-v3","parameters":[{"name":"run","value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3):\n # 示例代码如下。在这里编写您的代码\n df = input_1.read()\n df['instrument'] = df['instrument'].str.replace('.SZ', '.SZA')\n df['instrument'] = df['instrument'].str.replace('.SH', '.SHA')\n data_1 = DataSource.write_df(df)\n \n return Outputs(data_1=data_1, data_2=None, data_3=None)","type":"Literal","bound_global_parameter":null},{"name":"post_run","value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","type":"Literal","bound_global_parameter":null},{"name":"input_ports","value":"","type":"Literal","bound_global_parameter":null},{"name":"params","value":"{}","type":"Literal","bound_global_parameter":null},{"name":"output_ports","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-99"},{"name":"input_2","node_id":"-99"},{"name":"input_3","node_id":"-99"}],"output_ports":[{"name":"data_1","node_id":"-99"},{"name":"data_2","node_id":"-99"},{"name":"data_3","node_id":"-99"}],"cacheable":true,"seq_num":8,"comment":"","comment_collapsed":true,"x":-249,"y":339},{"node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2023-01-01","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2023-12-17","type":"Literal","bound_global_parameter":null},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":"0","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62"}],"output_ports":[{"name":"data","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62"}],"cacheable":true,"seq_num":11,"comment":"预测数据,用于回测和模拟","comment_collapsed":true,"x":-30,"y":-715},{"node_id":"-114","module_id":"BigQuantSpace.trade.trade-v4","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"initialize","value":"# 回测引擎:初始化函数,只执行一次\ndef bigquant_run(context):\n # 加载预测数据\n context.ranker_prediction = context.options['data'].read_df()\n\n # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数\n context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))\n # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)\n # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只\n stock_count = 1\n # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]\n context.stock_weights = [1]\n # 设置每只股票占用的最大资金比例\n context.max_cash_per_instrument = 1\n context.options['hold_days'] = 1\n","type":"Literal","bound_global_parameter":null},{"name":"handle_data","value":"# 回测引擎:每日数据处理函数,每天执行一次\ndef bigquant_run(context, data):\n # 按日期过滤得到今日的预测数据\n ranker_prediction = context.ranker_prediction[\n context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]\n cash_for_buy = min(context.portfolio.portfolio_value/1,context.portfolio.cash)\n buy_instruments = list(ranker_prediction.instrument)\n sell_instruments = [instrument.symbol for instrument in context.portfolio.positions.keys()]\n to_buy = set(buy_instruments[:1]) - set(sell_instruments) \n to_sell = set(sell_instruments) - set(buy_instruments[:1])\n \n for instrument in to_sell:\n context.order_target(context.symbol(instrument), 0)\n for instrument in to_buy:\n context.order_value(context.symbol(instrument), cash_for_buy)","type":"Literal","bound_global_parameter":null},{"name":"prepare","value":"def bigquant_run(context): \n from bigdata.api.datareader import D\n # 获取st状态和涨跌停状态\n context.status_df = D.features(instruments =context.instruments,start_date = context.start_date, end_date = context.end_date, \n fields=['st_status_0','price_limit_status_0','price_limit_status_1'])\n","type":"Literal","bound_global_parameter":null},{"name":"before_trading_start","value":"def bigquant_run(context, data): \n# 获取涨跌停状态数据\n df_price_limit_status=context.status_df.set_index('date')\n today=data.current_dt.strftime('%Y-%m-%d')\n # 得到当前未完成订单\n for orders in get_open_orders().values():\n # 循环,撤销订单\n for _order in orders:\n ins=str(_order.sid.symbol)\n try:\n #判断一下如果当日涨停,则取消卖单\n if df_price_limit_status[df_price_limit_status.instrument==ins].price_limit_status_0.loc[today]>2 and _order.amount<0:\n cancel_order(_order)\n print(today,'尾盘涨停取消卖单',ins) \n except:\n continue\n\n ","type":"Literal","bound_global_parameter":null},{"name":"volume_limit","value":0.025,"type":"Literal","bound_global_parameter":null},{"name":"order_price_field_buy","value":"open","type":"Literal","bound_global_parameter":null},{"name":"order_price_field_sell","value":"close","type":"Literal","bound_global_parameter":null},{"name":"capital_base","value":"1000000","type":"Literal","bound_global_parameter":null},{"name":"auto_cancel_non_tradable_orders","value":"True","type":"Literal","bound_global_parameter":null},{"name":"data_frequency","value":"daily","type":"Literal","bound_global_parameter":null},{"name":"price_type","value":"真实价格","type":"Literal","bound_global_parameter":null},{"name":"product_type","value":"股票","type":"Literal","bound_global_parameter":null},{"name":"plot_charts","value":"True","type":"Literal","bound_global_parameter":null},{"name":"backtest_only","value":"False","type":"Literal","bound_global_parameter":null},{"name":"benchmark","value":"000300.HIX","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-114"},{"name":"options_data","node_id":"-114"},{"name":"history_ds","node_id":"-114"},{"name":"benchmark_ds","node_id":"-114"},{"name":"trading_calendar","node_id":"-114"}],"output_ports":[{"name":"raw_perf","node_id":"-114"}],"cacheable":false,"seq_num":12,"comment":"","comment_collapsed":true,"x":-206,"y":445},{"node_id":"-113","module_id":"BigQuantSpace.sql_join_2.sql_join_2-v1","parameters":[{"name":"sql_join","value":"WITH\nsql1 AS (\n {sql1}\n),\nsql2 AS (\n {sql2}\n),\n\nsql3 AS (SELECT * FROM sql1 JOIN sql2 USING (date, instrument))\nSELECT\n\t*,\n -- 计算的是未来5日的收益率。这是通过将5天后的收盘价除以第二天的开盘价得到的。这里使用的是一个叫做m_lead的函数,它可以获取某个字段在未来某天的值。\n -- 计算未来5日收益率的1%分位数。all_quantile_cont是一个分位数函数,它能够计算出某个字段值的分位数,这里是计算1%的分位数。\n c_quantile_cont(future_return, 0.01) AS _future_return_1pct,\n\n -- 计算未来5日收益率的99%分位数。同样,all_quantile_cont函数用来计算99%的分位数。\n c_quantile_cont(future_return, 0.99) AS _future_return_99pct,\n\n -- 对未来5日收益率进行截断处理,值位于1%和99%分位数之间的数据被保留,超过这个范围的值将被设为边界值。\n clip(future_return, _future_return_1pct, _future_return_99pct) AS _clipped_return,\n\n -- 将截断后的未来5日收益率分为20个不同的桶,每个桶代表一个范围。all_wbins函数用于将数据离散化为多个桶。\n cbins(_clipped_return, 20) AS _binned_return,\n\n -- 将离散化后的数据作为标签使用,这是我们预测的目标。\n _binned_return AS label\n-- 从cn_stock_bar1d这个表中选择数据,这个表存储的是股票的日线数据\nFROM sql3\n-- QUALIFY 用于数据过滤 \nQUALIFY\n COLUMNS(*) IS NOT NULL\nORDER BY date, instrument\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"sql1","node_id":"-113"},{"name":"sql2","node_id":"-113"}],"output_ports":[{"name":"data","node_id":"-113"}],"cacheable":true,"seq_num":13,"comment":"3. 合并因子和标注数据","comment_collapsed":true,"x":-584.9536437988281,"y":-328.8037109375},{"node_id":"-121","module_id":"BigQuantSpace.sql_join_2.sql_join_2-v1","parameters":[{"name":"sql_join","value":"WITH\nsql1 AS (\n {sql1}\n),\nsql2 AS (\n {sql2}\n)\n\nSELECT * FROM sql1 JOIN sql2 USING (date, instrument)\nORDER BY date, instrument\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"sql1","node_id":"-121"},{"name":"sql2","node_id":"-121"}],"output_ports":[{"name":"data","node_id":"-121"}],"cacheable":true,"seq_num":15,"comment":"3. 合并因子和标注数据","comment_collapsed":true,"x":39.95826721191406,"y":-349.21795654296875},{"node_id":"-124","module_id":"BigQuantSpace.input_features_dai.input_features_dai-v6","parameters":[{"name":"sql","value":"SELECT\ndate,\ninstrument,\nprice_limit_status,\nm_lag(price_limit_status,2) as price_limit_status_2\nFROM cn_stock_status\nORDER BY\ndate, instrument\n","type":"Literal","bound_global_parameter":null}],"input_ports":[],"output_ports":[{"name":"data","node_id":"-124"}],"cacheable":true,"seq_num":16,"comment":"2. 编写因子","comment_collapsed":true,"x":206,"y":-514.4729309082031},{"node_id":"-170","module_id":"BigQuantSpace.sql_join_2.sql_join_2-v1","parameters":[{"name":"sql_join","value":"WITH\nsql1 AS (\n {sql1}\n),\nsql2 AS (\n {sql2}\n)\n\nSELECT * FROM sql1 JOIN sql2 USING (date, instrument)\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"sql1","node_id":"-170"},{"name":"sql2","node_id":"-170"}],"output_ports":[{"name":"data","node_id":"-170"}],"cacheable":true,"seq_num":27,"comment":"","comment_collapsed":true,"x":115.54866027832031,"y":-196.88255310058594},{"node_id":"-128","module_id":"BigQuantSpace.input_features_dai.input_features_dai-v6","parameters":[{"name":"sql","value":"-- 使用DAI SQL获取数据,构建因子等,如下是一个例子作为参考\n-- DAI SQL 语法: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-sql%E5%85%A5%E9%97%A8%E6%95%99%E7%A8%8B\nSELECT * FROM(\nSELECT\n\n -- 在这里输入因子表达式\n -- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0\n -- 数据&字段: 数据文档 https://bigquant.com/data/home\n (m_lag(close, 1) + m_lag(close, 2))/2 AS avg_close_2,\n close,\n \topen,\n \thigh,\n \tlow,\n -- 日期和股票代码\n date, instrument\n\n-- 预计算因子 cn_stock_bar1d https://bigquant.com/data/datasources/cn_stock_bar1d\nFROM cn_stock_bar1d\n\nWHERE\n -- 只选取在指定日期范围内的数据,这里多看90天,用于有的算子需要更早的数据\n date BETWEEN DATE '{start_date}' - INTERVAL 90 DAY AND '{end_date}'\n -- 去掉有空值的行\n AND COLUMNS(*) IS NOT NULL\n-- 窗口函数内容过滤需要放在 QUALIFY 这里\nQUALIFY\n -- 去掉有空值的行,如果用到窗口函数(比如 m_lag),这里需要删除,不然会报错\n COLUMNS(*) IS NOT NULL\n\n-- 按日期和股票代码排序,从小到大\nORDER BY date, instrument)\nWHERE\n\t(close/avg_close_2 -1)>0.1\nORDER BY date, instrument\n","type":"Literal","bound_global_parameter":null}],"input_ports":[],"output_ports":[{"name":"data","node_id":"-128"}],"cacheable":true,"seq_num":18,"comment":"2. 编写因子","comment_collapsed":true,"x":387.6568603515625,"y":-380.5733947753906},{"node_id":"-130","module_id":"BigQuantSpace.input_features_dai.input_features_dai-v6","parameters":[{"name":"sql","value":"-- 使用DAI SQL获取数据,构建因子等,如下是一个例子作为参考\n-- DAI SQL 语法: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-sql%E5%85%A5%E9%97%A8%E6%95%99%E7%A8%8B\nSELECT * FROM(\nSELECT\n -- 在这里输入因子表达式\n -- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0\n -- 数据&字段: 数据文档 https://bigquant.com/data/home\n (m_lag(close, 1) + m_lag(close, 2))/2 AS avg_close_2,\n close,\n \topen,\n \thigh,\n \tlow,\n -- 日期和股票代码\n date, instrument\n\n-- 预计算因子 cn_stock_bar1d https://bigquant.com/data/datasources/cn_stock_bar1d\nFROM cn_stock_bar1d\n\nWHERE\n -- 只选取在指定日期范围内的数据,这里多看90天,用于有的算子需要更早的数据\n date BETWEEN DATE '{start_date}' - INTERVAL 90 DAY AND '{end_date}'\n -- 去掉有空值的行\n AND COLUMNS(*) IS NOT NULL\n-- 窗口函数内容过滤需要放在 QUALIFY 这里\nQUALIFY\n -- 去掉有空值的行,如果用到窗口函数(比如 m_lag),这里需要删除,不然会报错\n COLUMNS(*) IS NOT NULL\n\n-- 按日期和股票代码排序,从小到大\nORDER BY date, instrument)\nWHERE\n\t(close/avg_close_2 -1)>0.1\nORDER BY date, instrument\n","type":"Literal","bound_global_parameter":null}],"input_ports":[],"output_ports":[{"name":"data","node_id":"-130"}],"cacheable":true,"seq_num":20,"comment":"","comment_collapsed":true,"x":-514,"y":-754},{"node_id":"-106","module_id":"BigQuantSpace.sql_join_2.sql_join_2-v1","parameters":[{"name":"sql_join","value":"WITH\nsql1 AS (\n {sql1}\n),\nsql2 AS (\n {sql2}\n)\n\nSELECT * FROM sql1 JOIN sql2 USING (date, instrument)\nORDER BY date, instrument\n","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"sql1","node_id":"-106"},{"name":"sql2","node_id":"-106"}],"output_ports":[{"name":"data","node_id":"-106"}],"cacheable":true,"seq_num":3,"comment":"3. 合并因子和标注数据","comment_collapsed":false,"x":-722,"y":-578}],"node_layout":"<node_postions><node_position Node='-115' Position='-887.5857238769531,-729.8284912109375,200,200'/><node_position Node='-118' Position='-218.2720184326172,-623.1422119140625,200,200'/><node_position Node='-110' Position='-258.94126892089844,219.88714599609375,200,200'/><node_position Node='-127' Position='-500.14678955078125,-189.39261627197266,200,200'/><node_position Node='-75' Position='-357.7325744628906,94.28900909423828,200,200'/><node_position Node='-88' Position='7.309146404266357,20.64450454711914,200,200'/><node_position Node='-79' Position='-481.7403564453125,-20.752729415893555,200,200'/><node_position Node='-96' Position='34.88255310058594,-87.28439712524414,200,200'/><node_position Node='-99' Position='-249,339,200,200'/><node_position Node='287d2cb0-f53c-4101-bdf8-104b137c8601-62' Position='-30,-715,200,200'/><node_position Node='-114' Position='-206,445,200,200'/><node_position Node='-113' Position='-584.9536437988281,-328.8037109375,200,200'/><node_position Node='-121' Position='39.95826721191406,-349.21795654296875,200,200'/><node_position Node='-124' Position='206,-514.4729309082031,200,200'/><node_position Node='-170' Position='115.54866027832031,-196.88255310058594,200,200'/><node_position Node='-128' Position='387.6568603515625,-380.5733947753906,200,200'/><node_position Node='-130' Position='-514,-754,200,200'/><node_position Node='-106' Position='-722,-578,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
[2023-12-20 14:27:36.782592] INFO: moduleinvoker:3459509417.py:140: input_features_dai.v6 开始运行..
[2023-12-20 14:27:36.955463] INFO: moduleinvoker:3459509417.py:140: 命中缓存
[2023-12-20 14:27:36.960257] INFO: moduleinvoker:3459509417.py:140: input_features_dai.v6 运行完成[0.177726s].
[2023-12-20 14:27:37.182613] INFO: moduleinvoker:3459509417.py:169: input_features_dai.v6 开始运行..
[2023-12-20 14:27:37.320421] INFO: moduleinvoker:3459509417.py:169: 命中缓存
[2023-12-20 14:27:37.325835] INFO: moduleinvoker:3459509417.py:169: input_features_dai.v6 运行完成[0.143235s].
[2023-12-20 14:27:37.444476] INFO: moduleinvoker:3459509417.py:225: instruments.v2 开始运行..
[2023-12-20 14:27:37.454895] INFO: moduleinvoker:3459509417.py:225: 命中缓存
[2023-12-20 14:27:37.459745] INFO: moduleinvoker:3459509417.py:225: instruments.v2 运行完成[0.015307s].
[2023-12-20 14:27:37.694260] INFO: moduleinvoker:3459509417.py:234: input_features_dai.v6 开始运行..
[2023-12-20 14:27:37.786471] INFO: moduleinvoker:3459509417.py:234: 命中缓存
[2023-12-20 14:27:37.792462] INFO: moduleinvoker:3459509417.py:234: input_features_dai.v6 运行完成[0.098231s].
[2023-12-20 14:27:37.995005] INFO: moduleinvoker:3459509417.py:247: sql_join_2.v1 开始运行..
[2023-12-20 14:27:38.086420] INFO: moduleinvoker:3459509417.py:247: 命中缓存
[2023-12-20 14:27:38.091322] INFO: moduleinvoker:3459509417.py:247: sql_join_2.v1 运行完成[0.096345s].
[2023-12-20 14:27:38.394258] INFO: moduleinvoker:3459509417.py:264: input_features_dai.v6 开始运行..
[2023-12-20 14:27:38.527844] INFO: moduleinvoker:3459509417.py:264: 命中缓存
[2023-12-20 14:27:38.532736] INFO: moduleinvoker:3459509417.py:264: input_features_dai.v6 运行完成[0.138517s].
[2023-12-20 14:27:38.654555] INFO: moduleinvoker:3459509417.py:303: sql_join_2.v1 开始运行..
[2023-12-20 14:27:38.797829] INFO: moduleinvoker:3459509417.py:303: 命中缓存
[2023-12-20 14:27:38.803030] INFO: moduleinvoker:3459509417.py:303: sql_join_2.v1 运行完成[0.148443s].
[2023-12-20 14:27:39.102439] INFO: moduleinvoker:3459509417.py:319: extract_data_dai.v7 开始运行..
[2023-12-20 14:27:39.247015] INFO: moduleinvoker:3459509417.py:319: 命中缓存
[2023-12-20 14:27:39.252111] INFO: moduleinvoker:3459509417.py:319: extract_data_dai.v7 运行完成[0.149685s].
[2023-12-20 14:27:39.525789] INFO: moduleinvoker:3459509417.py:330: cached.v3 开始运行..
[2023-12-20 14:27:39.536670] INFO: moduleinvoker:3459509417.py:330: 命中缓存
[2023-12-20 14:27:39.541325] INFO: moduleinvoker:3459509417.py:330: cached.v3 运行完成[0.015486s].
[2023-12-20 14:27:39.833393] INFO: moduleinvoker:3459509417.py:340: input_features_dai.v6 开始运行..
[2023-12-20 14:27:39.990476] INFO: moduleinvoker:3459509417.py:340: 命中缓存
[2023-12-20 14:27:39.995773] INFO: moduleinvoker:3459509417.py:340: input_features_dai.v6 运行完成[0.162336s].
[2023-12-20 14:27:40.163372] INFO: moduleinvoker:3459509417.py:378: sql_join_2.v1 开始运行..
[2023-12-20 14:27:40.296084] INFO: moduleinvoker:3459509417.py:378: 命中缓存
[2023-12-20 14:27:40.301479] INFO: moduleinvoker:3459509417.py:378: sql_join_2.v1 运行完成[0.138148s].
[2023-12-20 14:27:40.552727] INFO: moduleinvoker:3459509417.py:395: sql_join_2.v1 开始运行..
[2023-12-20 14:27:41.101203] INFO: moduleinvoker:3459509417.py:395: sql_join_2.v1 运行完成[0.548462s].
[2023-12-20 14:27:41.393238] INFO: moduleinvoker:3459509417.py:434: extract_data_dai.v7 开始运行..
2023-12-20 14:27:41 [info ] start_date='2016-01-01', end_date='2022-12-31', query_start_date='2015-10-03' ..
2023-12-20 14:30:21 [info ] data extracted: (66561, 29)
[2023-12-20 14:30:22.249736] INFO: moduleinvoker:3459509417.py:434: extract_data_dai.v7 运行完成[160.8565s].
[2023-12-20 14:30:22.409286] INFO: moduleinvoker:3459509417.py:445: cached.v3 开始运行..
[2023-12-20 14:30:23.001375] INFO: moduleinvoker:3459509417.py:445: cached.v3 运行完成[0.59213s].
[2023-12-20 14:30:23.180256] INFO: moduleinvoker:3459509417.py:455: stock_ranker_dai_train.v2 开始运行..
2023-12-20 14:30:23 [info ] data loaded: train=(63046, 23), validation=None
2023-12-20 14:30:23 [info ] 数据预处理 ..
2023-12-20 14:30:23 [info ] 特征预处理 ..
2023-12-20 14:30:23 [info ] prepare data: training ..
[2023-12-20 14:30:25.618099] ERROR: moduleinvoker:3459509417.py:455: module name: stock_ranker_dai_train, module version: v2, trackeback: IndexError: list index out of range
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
Cell In[10], line 455
445 m9 = M.cached.v3(
446 input_1=m4.data,
447 run=m9_run_bigquant_run,
(...)
451 output_ports=''
452 )
454 # @module(position="-357.7325744628906,94.28900909423828", comment='5. 使用StockRanker算法训练', comment_collapsed=False)
--> 455 m5 = M.stock_ranker_dai_train.v2(
456 data=m9.data_1,
457 learning_algorithm='排序',
458 number_of_leaves=30,
459 min_docs_per_leaf=1000,
460 number_of_trees=20,
461 learning_rate=0.1,
462 max_bins=1023,
463 feature_fraction=1,
464 data_row_fraction=1,
465 plot_charts=True,
466 ndcg_discount_base=1
467 )
469 # @module(position="-258.94126892089844,219.88714599609375", comment='7. 预测', comment_collapsed=False)
470 m14 = M.stock_ranker_dai_predict.v3(
471 model=m5.model,
472 data=m7.data_1
473 )
File module2/common/modulemanagerv2.py:88, in biglearning.module2.common.modulemanagerv2.BigQuantModuleVersion.__call__()
File module2/common/moduleinvoker.py:370, in biglearning.module2.common.moduleinvoker.module_invoke()
File module2/common/moduleinvoker.py:292, in biglearning.module2.common.moduleinvoker._invoke_with_cache()
File module2/common/moduleinvoker.py:253, in biglearning.module2.common.moduleinvoker._invoke_with_cache()
File module2/common/moduleinvoker.py:210, in biglearning.module2.common.moduleinvoker._module_run()
File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:913, in run(data, validation_ds, learning_algorithm, number_of_leaves, min_docs_per_leaf, number_of_trees, learning_rate, max_bins, feature_fraction, data_row_fraction, plot_charts, ndcg_discount_base, base_model)
910 validation_data = validation_ds.read() if validation_ds is not None else None
911 logger.info(f"data loaded: train={data.shape}, validation={validation_data.shape if validation_data is not None else None}")
--> 913 result = train(
914 features=None,
915 data=data,
916 validation_data=validation_data,
917 learning_algorithm=learning_algorithm,
918 learning_rate=learning_rate,
919 max_bins=max_bins,
920 number_of_trees=number_of_trees,
921 number_of_leaves=number_of_leaves,
922 min_docs_per_leaf=min_docs_per_leaf,
923 feature_fraction=feature_fraction,
924 data_row_fraction=data_row_fraction,
925 ndcg_discount_base=ndcg_discount_base,
926 base_model_id=None,
927 # fai_cluster=None,
928 )
930 return Outputs(
931 model=dai.DataSource.write_text(json.dumps(result["model"])),
932 feature_gains=dai.DataSource.write_bdb(result["feature_gains"]),
(...)
935 plot_charts=plot_charts,
936 )
File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:876, in train(features, data, validation_data, learning_algorithm, learning_rate, max_bins, number_of_trees, number_of_leaves, min_docs_per_leaf, feature_fraction, data_row_fraction, ndcg_discount_base, base_model_id, fai_cluster, **kwargs)
874 result = fai.get(fai.remote(_train).remote(**kwargs))
875 else:
--> 876 result = _train(**kwargs)
877 return result
File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:691, in train.<locals>._train(features, data, validation_data, learning_algorithm, learning_rate, max_bins, number_of_trees, number_of_leaves, min_docs_per_leaf, feature_fraction, data_row_fraction, ndcg_discount_base, base_model_id)
689 # TODO: 优化日志
690 logger.info("数据预处理 ..")
--> 691 bin_data = df_to_bin(
692 working_root=working_root,
693 dcg_trunc=10,
694 na_label=None,
695 sort=True,
696 bins="training",
697 max_bins=max_bins,
698 selected_columns=feature_names,
699 training=data,
700 validation=validation_data,
701 )
702 logger.info(f"训练集预处理完成 rows={bin_data.training_rows}, cols={bin_data.training_cols}")
703 if validation_data is not None:
File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:568, in train.<locals>.df_to_bin(working_root, dcg_trunc, na_label, bins, max_bins, selected_columns, sort, **kwargs)
566 output["%s_rows" % name] = len(df)
567 output["%s_cols" % name] = len(df.columns)
--> 568 output["%s_bin_path" % name] = __convert_to_bin(f"{working_root}/{name}.bin", df, rating_to_label, label_gains, dcg_trunc)
570 return Outputs(**output)
File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:480, in train.<locals>.__convert_to_bin(output_path, df, rating_to_label, label_gains, dcg_trunc)
479 def __convert_to_bin(output_path, df, rating_to_label, label_gains, dcg_trunc):
--> 480 dataframe_to_bin(
481 df,
482 output_path,
483 rating_to_label=rating_to_label and dict(rating_to_label),
484 label_gains=label_gains,
485 dcg_trunc=dcg_trunc,
486 )
487 return output_path
File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:465, in train.<locals>.dataframe_to_bin(df, output_bin_path, rating_to_label, label_gains, dcg_trunc, label_column)
454 def dataframe_to_bin(df, output_bin_path, rating_to_label=None, label_gains=None, dcg_trunc=10, label_column="label"):
455 """转化为stockranker训练数据格式
456
457 Args:
(...)
463 label_column: TODO
464 """
--> 465 return DataFrame2Bin(rating_to_label, label_gains, dcg_trunc, label_column).convert(df, output_bin_path)
File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:425, in train.<locals>.DataFrame2Bin.convert(self, df, output_bin_path)
422 self.__writer = output_bin_path
423 self.__require_close = False
--> 425 self.__calculate()
427 self.__writer.seek(self.__header_size, SEEK_CUR)
428 self.__write_dataset_skeleton()
File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:328, in train.<locals>.DataFrame2Bin.__calculate(self)
326 self.__query_ids = self.__df["m:QueryId"].drop_duplicates()
327 self.__boundaries = list(self.__query_ids.index) + [self.__num_docs]
--> 328 self.__max_dcg = self.__max_dcg_range()
330 self.__doc_to_query = [0] * len(self.__doc_ids)
331 for q in range(0, len(self.__query_ids)):
File /var/app/data/custom_module/publicmodules/stock_ranker_dai_train/v2/__init__.py:300, in train.<locals>.DataFrame2Bin.__max_dcg_range(self)
298 label_counts[i] = 0
299 for k in range(self.__boundaries[q], self.__boundaries[q + 1]):
--> 300 label = self.__labels[k]
301 label_counts[label] += 1
303 top_label = relevancy_level - 1
IndexError: list index out of range