复制链接
克隆策略

TabNet量化选股策略

策略思想

根据TabNet的预测结果,选择20支股票买入进行持有,每5日换仓。

    {"description":"实验创建于2017/8/26","graph":{"edges":[{"to_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-15:instruments","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8:data"},{"to_node_id":"-106:instruments","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8:data"},{"to_node_id":"-276:input_1","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-15:data"},{"to_node_id":"-106:features","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-113:features","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-122:features","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-129:features","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-243:features","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-251:features","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-266:input_2","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-288:features","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-298:input_2","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-293:features","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24:data"},{"to_node_id":"-243:input_data","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-53:data"},{"to_node_id":"-122:instruments","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62:data"},{"to_node_id":"-141:instruments","from_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62:data"},{"to_node_id":"-113:input_data","from_node_id":"-106:data"},{"to_node_id":"-266:input_1","from_node_id":"-113:data"},{"to_node_id":"-129:input_data","from_node_id":"-122:data"},{"to_node_id":"-2431:input_2","from_node_id":"-129:data"},{"to_node_id":"-298:input_1","from_node_id":"-129:data"},{"to_node_id":"-682:inputs","from_node_id":"-160:data"},{"to_node_id":"-18019:input1","from_node_id":"-160:data"},{"to_node_id":"-682:outputs","from_node_id":"-238:data"},{"to_node_id":"-1098:input_model","from_node_id":"-682:data"},{"to_node_id":"-1540:trained_model","from_node_id":"-1098:data"},{"to_node_id":"-2431:input_1","from_node_id":"-1540:data"},{"to_node_id":"-141:options_data","from_node_id":"-2431:data_1"},{"to_node_id":"-436:input_1","from_node_id":"-243:data"},{"to_node_id":"-1540:input_data","from_node_id":"-251:data"},{"to_node_id":"-1098:training_data","from_node_id":"-436:data_1"},{"to_node_id":"-1098:validation_data","from_node_id":"-436:data_2"},{"to_node_id":"-288:input_data","from_node_id":"-266:data"},{"to_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-53:data2","from_node_id":"-288:data"},{"to_node_id":"-251:input_data","from_node_id":"-293:data"},{"to_node_id":"-293:input_data","from_node_id":"-298:data"},{"to_node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-53:data1","from_node_id":"-276:data"},{"to_node_id":"-238:inputs","from_node_id":"-18019:data"}],"nodes":[{"node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2014-01-01","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"2017-12-31","type":"Literal","bound_global_parameter":null},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":"0","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8"}],"output_ports":[{"name":"data","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-8"}],"cacheable":true,"seq_num":1,"comment":"","comment_collapsed":true},{"node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-15","module_id":"BigQuantSpace.advanced_auto_labeler.advanced_auto_labeler-v2","parameters":[{"name":"label_expr","value":"# #号开始的表示注释\n# 0. 每行一个,顺序执行,从第二个开始,可以使用label字段\n# 1. 可用数据字段见 https://bigquant.com/docs/data_history_data.html\n# 添加benchmark_前缀,可使用对应的benchmark数据\n# 2. 可用操作符和函数见 `表达式引擎 <https://bigquant.com/docs/big_expr.html>`_\n\n# 计算收益:5日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)\nshift(close, -5) / shift(open, -1)-1\n\n# 极值处理:用1%和99%分位的值做clip\nclip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))\n\n# 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)\nwhere(shift(high, -1) == shift(low, -1), NaN, label)\n","type":"Literal","bound_global_parameter":null},{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"benchmark","value":"000300.SHA","type":"Literal","bound_global_parameter":null},{"name":"drop_na_label","value":"True","type":"Literal","bound_global_parameter":null},{"name":"cast_label_int","value":"False","type":"Literal","bound_global_parameter":null},{"name":"user_functions","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-15"}],"output_ports":[{"name":"data","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-15"}],"cacheable":true,"seq_num":2,"comment":"","comment_collapsed":true},{"node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24","module_id":"BigQuantSpace.input_features.input_features-v1","parameters":[{"name":"features","value":"close_0\nopen_0\nhigh_0\nlow_0 \namount_0\nturn_0 \nreturn_0\n \nclose_1\nopen_1\nhigh_1\nlow_1\nreturn_1\namount_1\nturn_1\n \nclose_2\nopen_2\nhigh_2\nlow_2\namount_2\nturn_2\nreturn_2\n \nclose_3\nopen_3\nhigh_3\nlow_3\namount_3\nturn_3\nreturn_3\n \nclose_4\nopen_4\nhigh_4\nlow_4\namount_4\nturn_4\nreturn_4\n \nmean(close_0, 5)\nmean(low_0, 5)\nmean(open_0, 5)\nmean(high_0, 5)\nmean(turn_0, 5)\nmean(amount_0, 5)\nmean(return_0, 5)\n \nts_max(close_0, 5)\nts_max(low_0, 5)\nts_max(open_0, 5)\nts_max(high_0, 5)\nts_max(turn_0, 5)\nts_max(amount_0, 5)\nts_max(return_0, 5)\n \nts_min(close_0, 5)\nts_min(low_0, 5)\nts_min(open_0, 5)\nts_min(high_0, 5)\nts_min(turn_0, 5)\nts_min(amount_0, 5)\nts_min(return_0, 5) \n \nstd(close_0, 5)\nstd(low_0, 5)\nstd(open_0, 5)\nstd(high_0, 5)\nstd(turn_0, 5)\nstd(amount_0, 5)\nstd(return_0, 5)\n \nts_rank(close_0, 5)\nts_rank(low_0, 5)\nts_rank(open_0, 5)\nts_rank(high_0, 5)\nts_rank(turn_0, 5)\nts_rank(amount_0, 5)\nts_rank(return_0, 5)\n \ndecay_linear(close_0, 5)\ndecay_linear(low_0, 5)\ndecay_linear(open_0, 5)\ndecay_linear(high_0, 5)\ndecay_linear(turn_0, 5)\ndecay_linear(amount_0, 5)\ndecay_linear(return_0, 5)\n \ncorrelation(volume_0, return_0, 5)\ncorrelation(volume_0, high_0, 5)\ncorrelation(volume_0, low_0, 5)\ncorrelation(volume_0, close_0, 5)\ncorrelation(volume_0, open_0, 5)\ncorrelation(volume_0, turn_0, 5)\n \ncorrelation(return_0, high_0, 5)\ncorrelation(return_0, low_0, 5)\ncorrelation(return_0, close_0, 5)\ncorrelation(return_0, open_0, 5)\ncorrelation(return_0, turn_0, 5)\n \ncorrelation(high_0, low_0, 5)\ncorrelation(high_0, close_0, 5)\ncorrelation(high_0, open_0, 5)\ncorrelation(high_0, turn_0, 5)\n \ncorrelation(low_0, close_0, 5)\ncorrelation(low_0, open_0, 5)\ncorrelation(low_0, turn_0, 5)\n \ncorrelation(close_0, open_0, 5)\ncorrelation(close_0, turn_0, 5)\n\ncorrelation(open_0, turn_0, 5)","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"features_ds","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24"}],"output_ports":[{"name":"data","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-24"}],"cacheable":true,"seq_num":3,"comment":"","comment_collapsed":true},{"node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-53","module_id":"BigQuantSpace.join.join-v3","parameters":[{"name":"on","value":"date,instrument","type":"Literal","bound_global_parameter":null},{"name":"how","value":"inner","type":"Literal","bound_global_parameter":null},{"name":"sort","value":"False","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"data1","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-53"},{"name":"data2","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-53"}],"output_ports":[{"name":"data","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-53"}],"cacheable":true,"seq_num":7,"comment":"","comment_collapsed":true},{"node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62","module_id":"BigQuantSpace.instruments.instruments-v2","parameters":[{"name":"start_date","value":"2018-01-01","type":"Literal","bound_global_parameter":"交易日期"},{"name":"end_date","value":"2022-08-10","type":"Literal","bound_global_parameter":"交易日期"},{"name":"market","value":"CN_STOCK_A","type":"Literal","bound_global_parameter":null},{"name":"instrument_list","value":"000837.SZA\n002885.SZA\n002665.SZA\n002204.SZA\n000899.SZA\n600647.SHA\n600758.SHA\n600757.SHA\n600756.SHA\n600755.SHA\n000001.SZA\n600750.SHA\n600749.SHA\n600748.SHA\n600746.SHA\n600745.SHA\n600744.SHA\n600754.SHA\n600751.SHA\n600760.SHA\n000661.SZA\n003021.SZA\n600580.SHA\n605358.SHA\n002229.SZA\n002231.SZA\n603929.SHA\n688002.SHA\n600641.SHA\n002119.SZA\n688699.SHA\n603823.SHA\n688083.SHA\n002987.SZA\n001270.SZA\n002344.SZA\n002346.SZA\n603726.SHA\n002276.SZA\n002156.SZA\n","type":"Literal","bound_global_parameter":null},{"name":"max_count","value":"0","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"rolling_conf","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62"}],"output_ports":[{"name":"data","node_id":"287d2cb0-f53c-4101-bdf8-104b137c8601-62"}],"cacheable":true,"seq_num":9,"comment":"预测数据,用于回测和模拟","comment_collapsed":true},{"node_id":"-106","module_id":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":0,"type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-106"},{"name":"features","node_id":"-106"}],"output_ports":[{"name":"data","node_id":"-106"}],"cacheable":true,"seq_num":15,"comment":"","comment_collapsed":true},{"node_id":"-113","module_id":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","parameters":[{"name":"date_col","value":"date","type":"Literal","bound_global_parameter":null},{"name":"instrument_col","value":"instrument","type":"Literal","bound_global_parameter":null},{"name":"drop_na","value":"True","type":"Literal","bound_global_parameter":null},{"name":"remove_extra_columns","value":"False","type":"Literal","bound_global_parameter":null},{"name":"user_functions","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-113"},{"name":"features","node_id":"-113"}],"output_ports":[{"name":"data","node_id":"-113"}],"cacheable":true,"seq_num":16,"comment":"","comment_collapsed":true},{"node_id":"-122","module_id":"BigQuantSpace.general_feature_extractor.general_feature_extractor-v7","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"before_start_days","value":0,"type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-122"},{"name":"features","node_id":"-122"}],"output_ports":[{"name":"data","node_id":"-122"}],"cacheable":true,"seq_num":17,"comment":"","comment_collapsed":true},{"node_id":"-129","module_id":"BigQuantSpace.derived_feature_extractor.derived_feature_extractor-v3","parameters":[{"name":"date_col","value":"date","type":"Literal","bound_global_parameter":null},{"name":"instrument_col","value":"instrument","type":"Literal","bound_global_parameter":null},{"name":"drop_na","value":"True","type":"Literal","bound_global_parameter":null},{"name":"remove_extra_columns","value":"False","type":"Literal","bound_global_parameter":null},{"name":"user_functions","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-129"},{"name":"features","node_id":"-129"}],"output_ports":[{"name":"data","node_id":"-129"}],"cacheable":true,"seq_num":18,"comment":"","comment_collapsed":true},{"node_id":"-141","module_id":"BigQuantSpace.trade.trade-v4","parameters":[{"name":"start_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"end_date","value":"","type":"Literal","bound_global_parameter":null},{"name":"initialize","value":"# 回测引擎:初始化函数,只执行一次\ndef bigquant_run(context):\n # 加载预测数据\n context.ranker_prediction = context.options['data'].read_df()\n\n # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数\n context.set_commission(PerOrder(buy_cost=0.001, sell_cost=0.001, min_cost=5))\n # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)\n # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只\n stock_count = 20\n # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]\n context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])\n # 设置每只股票占用的最大资金比例\n context.max_cash_per_instrument = 0.2\n context.options['hold_days'] = 5\n","type":"Literal","bound_global_parameter":null},{"name":"handle_data","value":"# 回测引擎:每日数据处理函数,每天执行一次\ndef bigquant_run(context, data):\n # 按日期过滤得到今日的预测数据\n ranker_prediction = context.ranker_prediction[\n context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]\n\n # 1. 资金分配\n # 平均持仓时间是hold_days,每日都将买入股票,每日预期使用 1/hold_days 的资金\n # 实际操作中,会存在一定的买入误差,所以在前hold_days天,等量使用资金;之后,尽量使用剩余资金(这里设置最多用等量的1.5倍)\n is_staging = context.trading_day_index < context.options['hold_days'] # 是否在建仓期间(前 hold_days 天)\n cash_avg = context.portfolio.portfolio_value / context.options['hold_days']\n cash_for_buy = min(context.portfolio.cash, (1 if is_staging else 1.5) * cash_avg)\n cash_for_sell = cash_avg - (context.portfolio.cash - cash_for_buy)\n positions = {e.symbol: p.amount * p.last_sale_price\n for e, p in context.perf_tracker.position_tracker.positions.items()}\n\n # 2. 生成卖出订单:hold_days天之后才开始卖出;对持仓的股票,按机器学习算法预测的排序末位淘汰\n if not is_staging and cash_for_sell > 0:\n equities = {e.symbol: e for e, p in context.perf_tracker.position_tracker.positions.items()}\n instruments = list(reversed(list(ranker_prediction.instrument[ranker_prediction.instrument.apply(\n lambda x: x in equities and not context.has_unfinished_sell_order(equities[x]))])))\n # print('rank order for sell %s' % instruments)\n for instrument in instruments:\n context.order_target(context.symbol(instrument), 0)\n cash_for_sell -= positions[instrument]\n if cash_for_sell <= 0:\n break\n\n # 3. 生成买入订单:按机器学习算法预测的排序,买入前面的stock_count只股票\n buy_cash_weights = context.stock_weights\n buy_instruments = list(ranker_prediction.instrument[:len(buy_cash_weights)])\n max_cash_per_instrument = context.portfolio.portfolio_value * context.max_cash_per_instrument\n for i, instrument in enumerate(buy_instruments):\n cash = cash_for_buy * buy_cash_weights[i]\n if cash > max_cash_per_instrument - positions.get(instrument, 0):\n # 确保股票持仓量不会超过每次股票最大的占用资金量\n cash = max_cash_per_instrument - positions.get(instrument, 0)\n if cash > 0:\n context.order_value(context.symbol(instrument), cash)\n","type":"Literal","bound_global_parameter":null},{"name":"prepare","value":"# 回测引擎:准备数据,只执行一次\ndef bigquant_run(context):\n pass\n","type":"Literal","bound_global_parameter":null},{"name":"before_trading_start","value":"","type":"Literal","bound_global_parameter":null},{"name":"volume_limit","value":0.025,"type":"Literal","bound_global_parameter":null},{"name":"order_price_field_buy","value":"open","type":"Literal","bound_global_parameter":null},{"name":"order_price_field_sell","value":"close","type":"Literal","bound_global_parameter":null},{"name":"capital_base","value":1000000,"type":"Literal","bound_global_parameter":null},{"name":"auto_cancel_non_tradable_orders","value":"True","type":"Literal","bound_global_parameter":null},{"name":"data_frequency","value":"daily","type":"Literal","bound_global_parameter":null},{"name":"price_type","value":"后复权","type":"Literal","bound_global_parameter":null},{"name":"product_type","value":"股票","type":"Literal","bound_global_parameter":null},{"name":"plot_charts","value":"True","type":"Literal","bound_global_parameter":null},{"name":"backtest_only","value":"False","type":"Literal","bound_global_parameter":null},{"name":"benchmark","value":"000300.SHA","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"instruments","node_id":"-141"},{"name":"options_data","node_id":"-141"},{"name":"history_ds","node_id":"-141"},{"name":"benchmark_ds","node_id":"-141"},{"name":"trading_calendar","node_id":"-141"}],"output_ports":[{"name":"raw_perf","node_id":"-141"}],"cacheable":false,"seq_num":19,"comment":"","comment_collapsed":true},{"node_id":"-160","module_id":"BigQuantSpace.dl_layer_input.dl_layer_input-v1","parameters":[{"name":"shape","value":"98","type":"Literal","bound_global_parameter":null},{"name":"batch_shape","value":"","type":"Literal","bound_global_parameter":null},{"name":"dtype","value":"float32","type":"Literal","bound_global_parameter":null},{"name":"sparse","value":"False","type":"Literal","bound_global_parameter":null},{"name":"name","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"inputs","node_id":"-160"}],"output_ports":[{"name":"data","node_id":"-160"}],"cacheable":false,"seq_num":6,"comment":"","comment_collapsed":true},{"node_id":"-238","module_id":"BigQuantSpace.dl_layer_dense.dl_layer_dense-v1","parameters":[{"name":"units","value":"1","type":"Literal","bound_global_parameter":null},{"name":"activation","value":"linear","type":"Literal","bound_global_parameter":null},{"name":"user_activation","value":"","type":"Literal","bound_global_parameter":null},{"name":"use_bias","value":"False","type":"Literal","bound_global_parameter":null},{"name":"kernel_initializer","value":"Zeros","type":"Literal","bound_global_parameter":null},{"name":"user_kernel_initializer","value":"","type":"Literal","bound_global_parameter":null},{"name":"bias_initializer","value":"Zeros","type":"Literal","bound_global_parameter":null},{"name":"user_bias_initializer","value":"","type":"Literal","bound_global_parameter":null},{"name":"kernel_regularizer","value":"None","type":"Literal","bound_global_parameter":null},{"name":"kernel_regularizer_l1","value":0,"type":"Literal","bound_global_parameter":null},{"name":"kernel_regularizer_l2","value":0,"type":"Literal","bound_global_parameter":null},{"name":"user_kernel_regularizer","value":"","type":"Literal","bound_global_parameter":null},{"name":"bias_regularizer","value":"None","type":"Literal","bound_global_parameter":null},{"name":"bias_regularizer_l1","value":0,"type":"Literal","bound_global_parameter":null},{"name":"bias_regularizer_l2","value":0,"type":"Literal","bound_global_parameter":null},{"name":"user_bias_regularizer","value":"","type":"Literal","bound_global_parameter":null},{"name":"activity_regularizer","value":"None","type":"Literal","bound_global_parameter":null},{"name":"activity_regularizer_l1","value":0,"type":"Literal","bound_global_parameter":null},{"name":"activity_regularizer_l2","value":0,"type":"Literal","bound_global_parameter":null},{"name":"user_activity_regularizer","value":"","type":"Literal","bound_global_parameter":null},{"name":"kernel_constraint","value":"None","type":"Literal","bound_global_parameter":null},{"name":"user_kernel_constraint","value":"","type":"Literal","bound_global_parameter":null},{"name":"bias_constraint","value":"None","type":"Literal","bound_global_parameter":null},{"name":"user_bias_constraint","value":"","type":"Literal","bound_global_parameter":null},{"name":"name","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"inputs","node_id":"-238"}],"output_ports":[{"name":"data","node_id":"-238"}],"cacheable":false,"seq_num":23,"comment":"","comment_collapsed":true},{"node_id":"-682","module_id":"BigQuantSpace.dl_model_init.dl_model_init-v1","parameters":[],"input_ports":[{"name":"inputs","node_id":"-682"},{"name":"outputs","node_id":"-682"}],"output_ports":[{"name":"data","node_id":"-682"}],"cacheable":false,"seq_num":4,"comment":"","comment_collapsed":true},{"node_id":"-1098","module_id":"BigQuantSpace.dl_model_train.dl_model_train-v1","parameters":[{"name":"optimizer","value":"自定义","type":"Literal","bound_global_parameter":null},{"name":"user_optimizer","value":"from tensorflow.keras.optimizers import Adam, schedules\n\nlr = schedules.ExponentialDecay(0.02, decay_steps=2000, decay_rate=0.9, staircase=False)\n\nbigquant_run=Adam(lr)","type":"Literal","bound_global_parameter":null},{"name":"loss","value":"mean_squared_error","type":"Literal","bound_global_parameter":null},{"name":"user_loss","value":"","type":"Literal","bound_global_parameter":null},{"name":"metrics","value":"mse","type":"Literal","bound_global_parameter":null},{"name":"batch_size","value":"10240","type":"Literal","bound_global_parameter":null},{"name":"epochs","value":"100","type":"Literal","bound_global_parameter":null},{"name":"earlystop","value":"from tensorflow.keras.callbacks import EarlyStopping\n\nbigquant_run=EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=5)","type":"Literal","bound_global_parameter":null},{"name":"custom_objects","value":"# 用户的自定义层需要写到字典中,比如\n# {\n# \"MyLayer\": MyLayer\n# }\nbigquant_run = {\n \"GroupNormalization\": GroupNormalization,\n \"TransformBlock\": TransformBlock,\n \"TabNetEncoderLayer\": TabNetEncoderLayer\n}\n","type":"Literal","bound_global_parameter":null},{"name":"n_gpus","value":"0","type":"Literal","bound_global_parameter":null},{"name":"verbose","value":"2:每个epoch输出一行记录","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_model","node_id":"-1098"},{"name":"training_data","node_id":"-1098"},{"name":"validation_data","node_id":"-1098"}],"output_ports":[{"name":"data","node_id":"-1098"}],"cacheable":false,"seq_num":5,"comment":"","comment_collapsed":true},{"node_id":"-1540","module_id":"BigQuantSpace.dl_model_predict.dl_model_predict-v1","parameters":[{"name":"batch_size","value":"1024","type":"Literal","bound_global_parameter":null},{"name":"n_gpus","value":0,"type":"Literal","bound_global_parameter":null},{"name":"verbose","value":"2:每个epoch输出一行记录","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"trained_model","node_id":"-1540"},{"name":"input_data","node_id":"-1540"}],"output_ports":[{"name":"data","node_id":"-1540"}],"cacheable":true,"seq_num":11,"comment":"","comment_collapsed":true},{"node_id":"-2431","module_id":"BigQuantSpace.cached.cached-v3","parameters":[{"name":"run","value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3):\n # 示例代码如下。在这里编写您的代码\n pred_label = input_1.read_pickle()\n df = input_2.read_df()\n df = pd.DataFrame({'pred_label':pred_label[:,0], 'instrument':df.instrument, 'date':df.date})\n df.sort_values(['date','pred_label'],inplace=True, ascending=[True,False])\n return Outputs(data_1=DataSource.write_df(df), data_2=None, data_3=None)\n","type":"Literal","bound_global_parameter":null},{"name":"post_run","value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","type":"Literal","bound_global_parameter":null},{"name":"input_ports","value":"","type":"Literal","bound_global_parameter":null},{"name":"params","value":"{}","type":"Literal","bound_global_parameter":null},{"name":"output_ports","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-2431"},{"name":"input_2","node_id":"-2431"},{"name":"input_3","node_id":"-2431"}],"output_ports":[{"name":"data_1","node_id":"-2431"},{"name":"data_2","node_id":"-2431"},{"name":"data_3","node_id":"-2431"}],"cacheable":true,"seq_num":24,"comment":"","comment_collapsed":true},{"node_id":"-243","module_id":"BigQuantSpace.dl_convert_to_bin.dl_convert_to_bin-v2","parameters":[{"name":"window_size","value":1,"type":"Literal","bound_global_parameter":null},{"name":"feature_clip","value":"3","type":"Literal","bound_global_parameter":null},{"name":"flatten","value":"True","type":"Literal","bound_global_parameter":null},{"name":"window_along_col","value":"instrument","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-243"},{"name":"features","node_id":"-243"}],"output_ports":[{"name":"data","node_id":"-243"}],"cacheable":true,"seq_num":26,"comment":"","comment_collapsed":true},{"node_id":"-251","module_id":"BigQuantSpace.dl_convert_to_bin.dl_convert_to_bin-v2","parameters":[{"name":"window_size","value":1,"type":"Literal","bound_global_parameter":null},{"name":"feature_clip","value":"3","type":"Literal","bound_global_parameter":null},{"name":"flatten","value":"True","type":"Literal","bound_global_parameter":null},{"name":"window_along_col","value":"instrument","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-251"},{"name":"features","node_id":"-251"}],"output_ports":[{"name":"data","node_id":"-251"}],"cacheable":true,"seq_num":27,"comment":"","comment_collapsed":true},{"node_id":"-436","module_id":"BigQuantSpace.cached.cached-v3","parameters":[{"name":"run","value":"# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端\ndef bigquant_run(input_1, input_2, input_3):\n # 示例代码如下。在这里编写您的代码\n from sklearn.model_selection import train_test_split\n data = input_1.read()\n x_train, x_val, y_train, y_val = train_test_split(data[\"x\"], data['y'], test_size=0.2)\n data_1 = DataSource.write_pickle({'x': x_train, 'y': y_train})\n data_2 = DataSource.write_pickle({'x': x_val, 'y': y_val})\n return Outputs(data_1=data_1, data_2=data_2, data_3=None)\n","type":"Literal","bound_global_parameter":null},{"name":"post_run","value":"# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。\ndef bigquant_run(outputs):\n return outputs\n","type":"Literal","bound_global_parameter":null},{"name":"input_ports","value":"","type":"Literal","bound_global_parameter":null},{"name":"params","value":"{}","type":"Literal","bound_global_parameter":null},{"name":"output_ports","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-436"},{"name":"input_2","node_id":"-436"},{"name":"input_3","node_id":"-436"}],"output_ports":[{"name":"data_1","node_id":"-436"},{"name":"data_2","node_id":"-436"},{"name":"data_3","node_id":"-436"}],"cacheable":true,"seq_num":10,"comment":"","comment_collapsed":true},{"node_id":"-266","module_id":"BigQuantSpace.standardlize.standardlize-v8","parameters":[{"name":"columns_input","value":"[]","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-266"},{"name":"input_2","node_id":"-266"}],"output_ports":[{"name":"data","node_id":"-266"}],"cacheable":true,"seq_num":28,"comment":"","comment_collapsed":true},{"node_id":"-288","module_id":"BigQuantSpace.fillnan.fillnan-v1","parameters":[{"name":"fill_value","value":"0.0","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-288"},{"name":"features","node_id":"-288"}],"output_ports":[{"name":"data","node_id":"-288"}],"cacheable":true,"seq_num":13,"comment":"","comment_collapsed":true},{"node_id":"-293","module_id":"BigQuantSpace.fillnan.fillnan-v1","parameters":[{"name":"fill_value","value":"0.0","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_data","node_id":"-293"},{"name":"features","node_id":"-293"}],"output_ports":[{"name":"data","node_id":"-293"}],"cacheable":true,"seq_num":14,"comment":"","comment_collapsed":true},{"node_id":"-298","module_id":"BigQuantSpace.standardlize.standardlize-v8","parameters":[{"name":"columns_input","value":"[]","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-298"},{"name":"input_2","node_id":"-298"}],"output_ports":[{"name":"data","node_id":"-298"}],"cacheable":true,"seq_num":25,"comment":"","comment_collapsed":true},{"node_id":"-276","module_id":"BigQuantSpace.standardlize.standardlize-v8","parameters":[{"name":"columns_input","value":"label","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input_1","node_id":"-276"},{"name":"input_2","node_id":"-276"}],"output_ports":[{"name":"data","node_id":"-276"}],"cacheable":true,"seq_num":29,"comment":"","comment_collapsed":true},{"node_id":"-18019","module_id":"BigQuantSpace.dl_layer_userlayer.dl_layer_userlayer-v1","parameters":[{"name":"layer_class","value":"import tensorflow as tf\nfrom tensorflow.keras.layers import Layer\n\ndef glu(x, n_units=None):\n \"\"\"Generalized linear unit nonlinear activation.\"\"\"\n if n_units is None:\n n_units = tf.shape(x)[-1] // 2\n\n return x[..., :n_units] * tf.nn.sigmoid(x[..., n_units:])\n\n\ndef sparsemax(logits, axis):\n logits = tf.convert_to_tensor(logits, name=\"logits\")\n\n # We need its original shape for shape inference.\n shape = logits.get_shape()\n rank = shape.rank\n is_last_axis = (axis == -1) or (axis == rank - 1)\n\n if is_last_axis:\n output = _compute_2d_sparsemax(logits)\n output.set_shape(shape)\n return output\n\n # Swap logits' dimension of dim and its last dimension.\n rank_op = tf.rank(logits)\n axis_norm = axis % rank\n logits = _swap_axis(logits, axis_norm, tf.math.subtract(rank_op, 1))\n\n # Do the actual softmax on its last dimension.\n output = _compute_2d_sparsemax(logits)\n output = _swap_axis(output, axis_norm, tf.math.subtract(rank_op, 1))\n\n # Make shape inference work since transpose may erase its static shape.\n output.set_shape(shape)\n return output\n\n\ndef _swap_axis(logits, dim_index, last_index, **kwargs):\n return tf.transpose(\n logits,\n tf.concat(\n [\n tf.range(dim_index),\n [last_index],\n tf.range(dim_index + 1, last_index),\n [dim_index],\n ],\n 0,\n ),\n **kwargs,\n )\n\n\ndef _compute_2d_sparsemax(logits):\n \"\"\"Performs the sparsemax operation when axis=-1.\"\"\"\n shape_op = tf.shape(logits)\n obs = tf.math.reduce_prod(shape_op[:-1])\n dims = shape_op[-1]\n\n # In the paper, they call the logits z.\n # The mean(logits) can be substracted from logits to make the algorithm\n # more numerically stable. the instability in this algorithm comes mostly\n # from the z_cumsum. Substacting the mean will cause z_cumsum to be close\n # to zero. However, in practise the numerical instability issues are very\n # minor and substacting the mean causes extra issues with inf and nan\n # input.\n # Reshape to [obs, dims] as it is almost free and means the remanining\n # code doesn't need to worry about the rank.\n z = tf.reshape(logits, [obs, dims])\n\n # sort z\n z_sorted, _ = tf.nn.top_k(z, k=dims)\n\n # calculate k(z)\n z_cumsum = tf.math.cumsum(z_sorted, axis=-1)\n k = tf.range(1, tf.cast(dims, logits.dtype) + 1) #, dtype=logits.dtype)\n z_check = 1 + k * z_sorted > z_cumsum\n # because the z_check vector is always [1,1,...1,0,0,...0] finding the\n # (index + 1) of the last `1` is the same as just summing the number of 1.\n k_z = tf.math.reduce_sum(tf.cast(z_check, tf.int32), axis=-1)\n\n # calculate tau(z)\n # If there are inf values or all values are -inf, the k_z will be zero,\n # this is mathematically invalid and will also cause the gather_nd to fail.\n # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then\n # fixed later (see p_safe) by returning p = nan. This results in the same\n # behavior as softmax.\n k_z_safe = tf.math.maximum(k_z, 1)\n indices = tf.stack([tf.range(0, obs), tf.reshape(k_z_safe, [-1]) - 1], axis=1)\n tau_sum = tf.gather_nd(z_cumsum, indices)\n tau_z = (tau_sum - 1) / tf.cast(k_z, logits.dtype)\n\n # calculate p\n p = tf.math.maximum(tf.cast(0, logits.dtype), z - tf.expand_dims(tau_z, -1))\n # If k_z = 0 or if z = nan, then the input is invalid\n p_safe = tf.where(\n tf.expand_dims(\n tf.math.logical_or(tf.math.equal(k_z, 0), tf.math.is_nan(z_cumsum[:, -1])),\n axis=-1,\n ),\n tf.fill([obs, dims], tf.cast(float(\"nan\"), logits.dtype)),\n p,\n )\n\n # Reshape back to original size\n p_safe = tf.reshape(p_safe, shape_op)\n return p_safe\n\n\n\"\"\"\nCode replicated from https://github.com/tensorflow/addons/blob/master/tensorflow_addons/layers/normalizations.py\n\"\"\"\nclass GroupNormalization(tf.keras.layers.Layer):\n def __init__(\n self,\n groups: int = 2,\n axis: int = -1,\n epsilon: float = 1e-3,\n center: bool = True,\n scale: bool = True,\n beta_initializer=\"zeros\",\n gamma_initializer=\"ones\",\n beta_regularizer=None,\n gamma_regularizer=None,\n beta_constraint=None,\n gamma_constraint=None,\n **kwargs\n ):\n super().__init__(**kwargs)\n self.supports_masking = True\n self.groups = groups\n self.axis = axis\n self.epsilon = epsilon\n self.center = center\n self.scale = scale\n self.beta_initializer = tf.keras.initializers.get(beta_initializer)\n self.gamma_initializer = tf.keras.initializers.get(gamma_initializer)\n self.beta_regularizer = tf.keras.regularizers.get(beta_regularizer)\n self.gamma_regularizer = tf.keras.regularizers.get(gamma_regularizer)\n self.beta_constraint = tf.keras.constraints.get(beta_constraint)\n self.gamma_constraint = tf.keras.constraints.get(gamma_constraint)\n self._check_axis()\n\n def build(self, input_shape):\n\n self._check_if_input_shape_is_none(input_shape)\n self._set_number_of_groups_for_instance_norm(input_shape)\n self._check_size_of_dimensions(input_shape)\n self._create_input_spec(input_shape)\n\n self._add_gamma_weight(input_shape)\n self._add_beta_weight(input_shape)\n self.built = True\n super().build(input_shape)\n\n def call(self, inputs, training=None):\n # Training=none is just for compat with batchnorm signature call\n input_shape = tf.keras.backend.int_shape(inputs)\n tensor_input_shape = tf.shape(inputs)\n\n reshaped_inputs, group_shape = self._reshape_into_groups(\n inputs, input_shape, tensor_input_shape\n )\n\n normalized_inputs = self._apply_normalization(reshaped_inputs, input_shape)\n\n outputs = tf.reshape(normalized_inputs, tensor_input_shape)\n\n return outputs\n\n def get_config(self):\n config = {\n \"groups\": self.groups,\n \"axis\": self.axis,\n \"epsilon\": self.epsilon,\n \"center\": self.center,\n \"scale\": self.scale,\n \"beta_initializer\": tf.keras.initializers.serialize(self.beta_initializer),\n \"gamma_initializer\": tf.keras.initializers.serialize(\n self.gamma_initializer\n ),\n \"beta_regularizer\": tf.keras.regularizers.serialize(self.beta_regularizer),\n \"gamma_regularizer\": tf.keras.regularizers.serialize(\n self.gamma_regularizer\n ),\n \"beta_constraint\": tf.keras.constraints.serialize(self.beta_constraint),\n \"gamma_constraint\": tf.keras.constraints.serialize(self.gamma_constraint),\n }\n base_config = super().get_config()\n return {**base_config, **config}\n\n def compute_output_shape(self, input_shape):\n return input_shape\n\n def _reshape_into_groups(self, inputs, input_shape, tensor_input_shape):\n\n group_shape = [tensor_input_shape[i] for i in range(len(input_shape))]\n group_shape[self.axis] = input_shape[self.axis] // self.groups\n group_shape.insert(self.axis, self.groups)\n group_shape = tf.stack(group_shape)\n reshaped_inputs = tf.reshape(inputs, group_shape)\n return reshaped_inputs, group_shape\n\n def _apply_normalization(self, reshaped_inputs, input_shape):\n\n group_shape = tf.keras.backend.int_shape(reshaped_inputs)\n group_reduction_axes = list(range(1, len(group_shape)))\n axis = -2 if self.axis == -1 else self.axis - 1\n group_reduction_axes.pop(axis)\n\n mean, variance = tf.nn.moments(\n reshaped_inputs, group_reduction_axes, keepdims=True\n )\n\n gamma, beta = self._get_reshaped_weights(input_shape)\n normalized_inputs = tf.nn.batch_normalization(\n reshaped_inputs,\n mean=mean,\n variance=variance,\n scale=gamma,\n offset=beta,\n variance_epsilon=self.epsilon,\n )\n return normalized_inputs\n\n def _get_reshaped_weights(self, input_shape):\n broadcast_shape = self._create_broadcast_shape(input_shape)\n gamma = None\n beta = None\n if self.scale:\n gamma = tf.reshape(self.gamma, broadcast_shape)\n\n if self.center:\n beta = tf.reshape(self.beta, broadcast_shape)\n return gamma, beta\n\n def _check_if_input_shape_is_none(self, input_shape):\n dim = input_shape[self.axis]\n if dim is None:\n raise ValueError(\n \"Axis \" + str(self.axis) + \" of \"\n \"input tensor should have a defined dimension \"\n \"but the layer received an input with shape \" + str(input_shape) + \".\"\n )\n\n def _set_number_of_groups_for_instance_norm(self, input_shape):\n dim = input_shape[self.axis]\n\n if self.groups == -1:\n self.groups = dim\n\n def _check_size_of_dimensions(self, input_shape):\n\n dim = input_shape[self.axis]\n if dim < self.groups:\n raise ValueError(\n \"Number of groups (\" + str(self.groups) + \") cannot be \"\n \"more than the number of channels (\" + str(dim) + \").\"\n )\n\n if dim % self.groups != 0:\n raise ValueError(\n \"Number of groups (\" + str(self.groups) + \") must be a \"\n \"multiple of the number of channels (\" + str(dim) + \").\"\n )\n\n def _check_axis(self):\n\n if self.axis == 0:\n raise ValueError(\n \"You are trying to normalize your batch axis. Do you want to \"\n \"use tf.layer.batch_normalization instead\"\n )\n\n def _create_input_spec(self, input_shape):\n\n dim = input_shape[self.axis]\n self.input_spec = tf.keras.layers.InputSpec(\n ndim=len(input_shape), axes={self.axis: dim}\n )\n\n def _add_gamma_weight(self, input_shape):\n\n dim = input_shape[self.axis]\n shape = (dim,)\n\n if self.scale:\n self.gamma = self.add_weight(\n shape=shape,\n name=\"gamma\",\n initializer=self.gamma_initializer,\n regularizer=self.gamma_regularizer,\n constraint=self.gamma_constraint,\n )\n else:\n self.gamma = None\n\n def _add_beta_weight(self, input_shape):\n\n dim = input_shape[self.axis]\n shape = (dim,)\n\n if self.center:\n self.beta = self.add_weight(\n shape=shape,\n name=\"beta\",\n initializer=self.beta_initializer,\n regularizer=self.beta_regularizer,\n constraint=self.beta_constraint,\n )\n else:\n self.beta = None\n\n def _create_broadcast_shape(self, input_shape):\n broadcast_shape = [1] * len(input_shape)\n broadcast_shape[self.axis] = input_shape[self.axis] // self.groups\n broadcast_shape.insert(self.axis, self.groups)\n return broadcast_shape\n\n \nclass TransformBlock(tf.keras.layers.Layer):\n\n def __init__(self, features,\n norm_type,\n momentum=0.9,\n virtual_batch_size=None,\n groups=2,\n block_name='',\n **kwargs):\n super(TransformBlock, self).__init__(**kwargs)\n\n self.features = features\n self.norm_type = norm_type\n self.momentum = momentum\n self.groups = groups\n self.virtual_batch_size = virtual_batch_size\n self.block_name = block_name\n \n def build(self, input_shape):\n self.transform = tf.keras.layers.Dense(self.features, use_bias=False, name=f'transformblock_dense_{self.block_name}')\n if self.norm_type == 'batch':\n self.bn = tf.keras.layers.BatchNormalization(axis=-1, momentum=momentum,\n virtual_batch_size=virtual_batch_size,\n name=f'transformblock_bn_{self.block_name}')\n else:\n self.bn = GroupNormalization(axis=-1, groups=self.groups, name=f'transformblock_gn_{self.block_name}')\n \n self.built = True\n super().build(input_shape)\n \n def call(self, inputs, training=None):\n x = self.transform(inputs)\n x = self.bn(x, training=training)\n return x\n \n def get_config(self):\n config = {\n \"features\": self.features,\n \"norm_type\": self.norm_type,\n \"virtual_batch_size\": self.virtual_batch_size,\n \"groups\": self.groups,\n \"block_name\": self.block_name\n }\n base_config = super().get_config()\n return {**base_config, **config}\n \n def compute_output_shape(self, input_shape):\n return input_shape\n\n\nclass TabNetEncoderLayer(tf.keras.layers.Layer):\n\n def __init__(self, feature_columns,\n feature_dim=16,\n output_dim=8,\n num_features=None,\n num_decision_steps=3,\n relaxation_factor=1.5,\n sparsity_coefficient=1e-5,\n norm_type='group',\n batch_momentum=0.98,\n virtual_batch_size=1024,\n num_groups=2,\n epsilon=1e-5,\n **kwargs):\n\n super(TabNetEncoderLayer, self).__init__(**kwargs)\n\n # Input checks\n if feature_columns is not None:\n if type(feature_columns) not in (list, tuple):\n raise ValueError(\"`feature_columns` must be a list or a tuple.\")\n\n if len(feature_columns) == 0:\n raise ValueError(\"`feature_columns` must be contain at least 1 tf.feature_column !\")\n\n if num_features is None:\n num_features = len(feature_columns)\n else:\n num_features = int(num_features)\n\n else:\n if num_features is None:\n raise ValueError(\"If `feature_columns` is None, then `num_features` cannot be None.\")\n\n if num_decision_steps < 1:\n raise ValueError(\"Num decision steps must be greater than 0.\")\n \n if feature_dim <= output_dim:\n raise ValueError(\"To compute `features_for_coef`, feature_dim must be larger than output dim\")\n\n feature_dim = int(feature_dim)\n output_dim = int(output_dim)\n num_decision_steps = int(num_decision_steps)\n relaxation_factor = float(relaxation_factor)\n sparsity_coefficient = float(sparsity_coefficient)\n batch_momentum = float(batch_momentum)\n num_groups = max(1, int(num_groups))\n epsilon = float(epsilon)\n\n if relaxation_factor < 0.:\n raise ValueError(\"`relaxation_factor` cannot be negative !\")\n\n if sparsity_coefficient < 0.:\n raise ValueError(\"`sparsity_coefficient` cannot be negative !\")\n\n if virtual_batch_size is not None:\n virtual_batch_size = int(virtual_batch_size)\n\n if norm_type not in ['batch', 'group']:\n raise ValueError(\"`norm_type` must be either `batch` or `group`\")\n\n self.feature_columns = feature_columns\n self.num_features = num_features\n self.feature_dim = feature_dim\n self.output_dim = output_dim\n\n self.num_decision_steps = num_decision_steps\n self.relaxation_factor = relaxation_factor\n self.sparsity_coefficient = sparsity_coefficient\n self.norm_type = norm_type\n self.batch_momentum = batch_momentum\n self.virtual_batch_size = virtual_batch_size\n self.num_groups = num_groups\n self.epsilon = epsilon\n\n if num_decision_steps > 1:\n features_for_coeff = feature_dim - output_dim\n print(f\"[TabNet]: {features_for_coeff} features will be used for decision steps.\")\n\n if self.feature_columns is not None:\n self.input_features = tf.keras.layers.DenseFeatures(feature_columns, trainable=True)\n\n if self.norm_type == 'batch':\n self.input_bn = tf.keras.layers.BatchNormalization(axis=-1, momentum=batch_momentum, name='input_bn')\n else:\n self.input_bn = GroupNormalization(axis=-1, groups=self.num_groups, name='input_gn')\n\n else:\n self.input_features = None\n self.input_bn = None\n \n def build(self, input_shape):\n self.transform_f1 = TransformBlock(2 * self.feature_dim, self.norm_type,\n self.batch_momentum, self.virtual_batch_size, self.num_groups,\n block_name='f1')\n\n self.transform_f2 = TransformBlock(2 * self.feature_dim, self.norm_type,\n self.batch_momentum, self.virtual_batch_size, self.num_groups,\n block_name='f2')\n\n self.transform_f3_list = [\n TransformBlock(2 * self.feature_dim, self.norm_type,\n self.batch_momentum, self.virtual_batch_size, self.num_groups, block_name=f'f3_{i}')\n for i in range(self.num_decision_steps)\n ]\n\n self.transform_f4_list = [\n TransformBlock(2 * self.feature_dim, self.norm_type,\n self.batch_momentum, self.virtual_batch_size, self.num_groups, block_name=f'f4_{i}')\n for i in range(self.num_decision_steps)\n ]\n\n self.transform_coef_list = [\n TransformBlock(self.num_features, self.norm_type,\n self.batch_momentum, self.virtual_batch_size, self.num_groups, block_name=f'coef_{i}')\n for i in range(self.num_decision_steps - 1)\n ]\n\n self._step_feature_selection_masks = None\n self._step_aggregate_feature_selection_mask = None\n self.built = True\n super(TabNetEncoderLayer, self).build(input_shape)\n\n def call(self, inputs, training=None):\n if self.input_features is not None:\n features = self.input_features(inputs)\n features = self.input_bn(features, training=training)\n\n else:\n features = inputs\n\n batch_size = tf.shape(features)[0]\n self._step_feature_selection_masks = []\n self._step_aggregate_feature_selection_mask = None\n\n # Initializes decision-step dependent variables.\n output_aggregated = tf.zeros([batch_size, self.output_dim])\n masked_features = features\n mask_values = tf.zeros([batch_size, self.num_features])\n aggregated_mask_values = tf.zeros([batch_size, self.num_features])\n complementary_aggregated_mask_values = tf.ones(\n [batch_size, self.num_features])\n\n total_entropy = 0.0\n entropy_loss = 0.\n\n for ni in range(self.num_decision_steps):\n # Feature transformer with two shared and two decision step dependent\n # blocks is used below.=\n transform_f1 = self.transform_f1(masked_features, training=training)\n transform_f1 = glu(transform_f1, self.feature_dim)\n\n transform_f2 = self.transform_f2(transform_f1, training=training)\n transform_f2 = (glu(transform_f2, self.feature_dim) +\n transform_f1) * tf.math.sqrt(0.5)\n\n transform_f3 = self.transform_f3_list[ni](transform_f2, training=training)\n transform_f3 = (glu(transform_f3, self.feature_dim) +\n transform_f2) * tf.math.sqrt(0.5)\n\n transform_f4 = self.transform_f4_list[ni](transform_f3, training=training)\n transform_f4 = (glu(transform_f4, self.feature_dim) +\n transform_f3) * tf.math.sqrt(0.5)\n\n if (ni > 0 or self.num_decision_steps == 1):\n decision_out = tf.nn.relu(transform_f4[:, :self.output_dim])\n\n # Decision aggregation.\n output_aggregated += decision_out\n\n # Aggregated masks are used for visualization of the\n # feature importance attributes.\n scale_agg = tf.reduce_sum(decision_out, axis=1, keepdims=True)\n\n if self.num_decision_steps > 1:\n scale_agg = scale_agg / tf.cast(self.num_decision_steps - 1, tf.float32)\n\n aggregated_mask_values += mask_values * scale_agg\n\n features_for_coef = transform_f4[:, self.output_dim:]\n\n if ni < (self.num_decision_steps - 1):\n # Determines the feature masks via linear and nonlinear\n # transformations, taking into account of aggregated feature use.\n mask_values = self.transform_coef_list[ni](features_for_coef, training=training)\n mask_values *= complementary_aggregated_mask_values\n mask_values = sparsemax(mask_values, axis=-1)\n\n # Relaxation factor controls the amount of reuse of features between\n # different decision blocks and updated with the values of\n # coefficients.\n complementary_aggregated_mask_values *= (\n self.relaxation_factor - mask_values)\n\n # Entropy is used to penalize the amount of sparsity in feature\n # selection.\n total_entropy += tf.reduce_mean(\n tf.reduce_sum(\n -mask_values * tf.math.log(mask_values + self.epsilon), axis=1)) / (\n tf.cast(self.num_decision_steps - 1, tf.float32))\n\n # Add entropy loss\n entropy_loss = total_entropy\n\n # Feature selection.\n masked_features = tf.multiply(mask_values, features)\n\n # Visualization of the feature selection mask at decision step ni\n # tf.summary.image(\n # \"Mask for step\" + str(ni),\n # tf.expand_dims(tf.expand_dims(mask_values, 0), 3),\n # max_outputs=1)\n mask_at_step_i = tf.expand_dims(tf.expand_dims(mask_values, 0), 3)\n self._step_feature_selection_masks.append(mask_at_step_i)\n\n else:\n # This branch is needed for correct compilation by tf.autograph\n entropy_loss = 0.\n\n # Adds the loss automatically\n self.add_loss(self.sparsity_coefficient * entropy_loss)\n\n # Visualization of the aggregated feature importances\n # tf.summary.image(\n # \"Aggregated mask\",\n # tf.expand_dims(tf.expand_dims(aggregated_mask_values, 0), 3),\n # max_outputs=1)\n\n agg_mask = tf.expand_dims(tf.expand_dims(aggregated_mask_values, 0), 3)\n self._step_aggregate_feature_selection_mask = agg_mask\n return output_aggregated\n\n def feature_selection_masks(self):\n return self._step_feature_selection_masks\n\n def aggregate_feature_selection_mask(self):\n return self._step_aggregate_feature_selection_mask\n \n def compute_output_shape(self, input_shape):\n return self.output_dim\n \n def get_config(self):\n config = {\n \"feature_columns\": self.feature_columns,\n \"num_features\": self.num_features,\n \"feature_dim\": self.feature_dim,\n \"output_dim\": self.output_dim,\n \"num_decision_steps\": self.num_decision_steps,\n \"relaxation_factor\": self.relaxation_factor,\n \"sparsity_coefficient\": self.sparsity_coefficient,\n \"norm_type\": self.norm_type,\n \"batch_momentum\": self.batch_momentum,\n \"virtual_batch_size\": self.virtual_batch_size,\n \"num_groups\": self.num_groups,\n \"epsilon\": self.epsilon,\n }\n base_config = super().get_config()\n return {**base_config, **config}\n \n \n# 必须也将 UserLayer 赋值给 bigquant_run\nbigquant_run = TabNetEncoderLayer\n","type":"Literal","bound_global_parameter":null},{"name":"params","value":"{\n \"num_features\": 98, \n \"feature_columns\": None,\n \"feature_dim\": 64,\n \"output_dim\": 32,\n \"num_decision_steps\": 3,\n \"relaxation_factor\": 1.3,\n \"sparsity_coefficient\": 1e-5,\n \"norm_type\": \"group\",\n \"batch_momentum\": 0.9,\n \"virtual_batch_size\": 128,\n \"num_groups\": 2,\n \"epsilon\": 1e-5\n}","type":"Literal","bound_global_parameter":null},{"name":"name","value":"","type":"Literal","bound_global_parameter":null}],"input_ports":[{"name":"input1","node_id":"-18019"},{"name":"input2","node_id":"-18019"},{"name":"input3","node_id":"-18019"}],"output_ports":[{"name":"data","node_id":"-18019"}],"cacheable":false,"seq_num":12,"comment":"Tannet Encoder","comment_collapsed":false}],"node_layout":"<node_postions><node_position Node='287d2cb0-f53c-4101-bdf8-104b137c8601-8' Position='322,62,200,200'/><node_position Node='287d2cb0-f53c-4101-bdf8-104b137c8601-15' Position='114,177,200,200'/><node_position Node='287d2cb0-f53c-4101-bdf8-104b137c8601-24' Position='765,-27,200,200'/><node_position Node='287d2cb0-f53c-4101-bdf8-104b137c8601-53' Position='291,505,200,200'/><node_position Node='287d2cb0-f53c-4101-bdf8-104b137c8601-62' Position='1164,77,200,200'/><node_position Node='-106' Position='441,170,200,200'/><node_position Node='-113' Position='442,234,200,200'/><node_position Node='-122' Position='1167,171,200,200'/><node_position Node='-129' Position='1166,246,200,200'/><node_position Node='-141' Position='193,1195,200,200'/><node_position Node='-160' Position='-497,324,200,200'/><node_position Node='-238' Position='-253,587,200,200'/><node_position Node='-682' Position='-394,754,200,200'/><node_position Node='-1098' Position='60,840,200,200'/><node_position Node='-1540' Position='268,954,200,200'/><node_position Node='-2431' Position='281,1077,200,200'/><node_position Node='-243' Position='288,590,200,200'/><node_position Node='-251' Position='1149,489,200,200'/><node_position Node='-436' Position='287,683,200,200'/><node_position Node='-266' Position='448,313,200,200'/><node_position Node='-288' Position='445,381,200,200'/><node_position Node='-293' Position='1160,394,200,200'/><node_position Node='-298' Position='1166,312,200,200'/><node_position Node='-276' Position='117,249,200,200'/><node_position Node='-18019' Position='-265,451,200,200'/></node_postions>"},"nodes_readonly":false,"studio_version":"v2"}
    In [3]:
    # 本代码由可视化策略环境自动生成 2022年8月10日 00:34
    # 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
    
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m10_run_bigquant_run(input_1, input_2, input_3):
        # 示例代码如下。在这里编写您的代码
        from sklearn.model_selection import train_test_split
        data = input_1.read()
        x_train, x_val, y_train, y_val = train_test_split(data["x"], data['y'], test_size=0.2)
        data_1 = DataSource.write_pickle({'x': x_train, 'y': y_train})
        data_2 = DataSource.write_pickle({'x': x_val, 'y': y_val})
        return Outputs(data_1=data_1, data_2=data_2, data_3=None)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m10_post_run_bigquant_run(outputs):
        return outputs
    
    import tensorflow as tf
    from tensorflow.keras.layers import Layer
    
    def glu(x, n_units=None):
        """Generalized linear unit nonlinear activation."""
        if n_units is None:
            n_units = tf.shape(x)[-1] // 2
    
        return x[..., :n_units] * tf.nn.sigmoid(x[..., n_units:])
    
    
    def sparsemax(logits, axis):
        logits = tf.convert_to_tensor(logits, name="logits")
    
        # We need its original shape for shape inference.
        shape = logits.get_shape()
        rank = shape.rank
        is_last_axis = (axis == -1) or (axis == rank - 1)
    
        if is_last_axis:
            output = _compute_2d_sparsemax(logits)
            output.set_shape(shape)
            return output
    
        # Swap logits' dimension of dim and its last dimension.
        rank_op = tf.rank(logits)
        axis_norm = axis % rank
        logits = _swap_axis(logits, axis_norm, tf.math.subtract(rank_op, 1))
    
        # Do the actual softmax on its last dimension.
        output = _compute_2d_sparsemax(logits)
        output = _swap_axis(output, axis_norm, tf.math.subtract(rank_op, 1))
    
        # Make shape inference work since transpose may erase its static shape.
        output.set_shape(shape)
        return output
    
    
    def _swap_axis(logits, dim_index, last_index, **kwargs):
        return tf.transpose(
            logits,
            tf.concat(
                [
                    tf.range(dim_index),
                    [last_index],
                    tf.range(dim_index + 1, last_index),
                    [dim_index],
                ],
                0,
            ),
            **kwargs,
        )
    
    
    def _compute_2d_sparsemax(logits):
        """Performs the sparsemax operation when axis=-1."""
        shape_op = tf.shape(logits)
        obs = tf.math.reduce_prod(shape_op[:-1])
        dims = shape_op[-1]
    
        # In the paper, they call the logits z.
        # The mean(logits) can be substracted from logits to make the algorithm
        # more numerically stable. the instability in this algorithm comes mostly
        # from the z_cumsum. Substacting the mean will cause z_cumsum to be close
        # to zero. However, in practise the numerical instability issues are very
        # minor and substacting the mean causes extra issues with inf and nan
        # input.
        # Reshape to [obs, dims] as it is almost free and means the remanining
        # code doesn't need to worry about the rank.
        z = tf.reshape(logits, [obs, dims])
    
        # sort z
        z_sorted, _ = tf.nn.top_k(z, k=dims)
    
        # calculate k(z)
        z_cumsum = tf.math.cumsum(z_sorted, axis=-1)
        k = tf.range(1, tf.cast(dims, logits.dtype) + 1) #, dtype=logits.dtype)
        z_check = 1 + k * z_sorted > z_cumsum
        # because the z_check vector is always [1,1,...1,0,0,...0] finding the
        # (index + 1) of the last `1` is the same as just summing the number of 1.
        k_z = tf.math.reduce_sum(tf.cast(z_check, tf.int32), axis=-1)
    
        # calculate tau(z)
        # If there are inf values or all values are -inf, the k_z will be zero,
        # this is mathematically invalid and will also cause the gather_nd to fail.
        # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then
        # fixed later (see p_safe) by returning p = nan. This results in the same
        # behavior as softmax.
        k_z_safe = tf.math.maximum(k_z, 1)
        indices = tf.stack([tf.range(0, obs), tf.reshape(k_z_safe, [-1]) - 1], axis=1)
        tau_sum = tf.gather_nd(z_cumsum, indices)
        tau_z = (tau_sum - 1) / tf.cast(k_z, logits.dtype)
    
        # calculate p
        p = tf.math.maximum(tf.cast(0, logits.dtype), z - tf.expand_dims(tau_z, -1))
        # If k_z = 0 or if z = nan, then the input is invalid
        p_safe = tf.where(
            tf.expand_dims(
                tf.math.logical_or(tf.math.equal(k_z, 0), tf.math.is_nan(z_cumsum[:, -1])),
                axis=-1,
            ),
            tf.fill([obs, dims], tf.cast(float("nan"), logits.dtype)),
            p,
        )
    
        # Reshape back to original size
        p_safe = tf.reshape(p_safe, shape_op)
        return p_safe
    
    
    """
    Code replicated from https://github.com/tensorflow/addons/blob/master/tensorflow_addons/layers/normalizations.py
    """
    class GroupNormalization(tf.keras.layers.Layer):
        def __init__(
                self,
                groups: int = 2,
                axis: int = -1,
                epsilon: float = 1e-3,
                center: bool = True,
                scale: bool = True,
                beta_initializer="zeros",
                gamma_initializer="ones",
                beta_regularizer=None,
                gamma_regularizer=None,
                beta_constraint=None,
                gamma_constraint=None,
                **kwargs
        ):
            super().__init__(**kwargs)
            self.supports_masking = True
            self.groups = groups
            self.axis = axis
            self.epsilon = epsilon
            self.center = center
            self.scale = scale
            self.beta_initializer = tf.keras.initializers.get(beta_initializer)
            self.gamma_initializer = tf.keras.initializers.get(gamma_initializer)
            self.beta_regularizer = tf.keras.regularizers.get(beta_regularizer)
            self.gamma_regularizer = tf.keras.regularizers.get(gamma_regularizer)
            self.beta_constraint = tf.keras.constraints.get(beta_constraint)
            self.gamma_constraint = tf.keras.constraints.get(gamma_constraint)
            self._check_axis()
    
        def build(self, input_shape):
    
            self._check_if_input_shape_is_none(input_shape)
            self._set_number_of_groups_for_instance_norm(input_shape)
            self._check_size_of_dimensions(input_shape)
            self._create_input_spec(input_shape)
    
            self._add_gamma_weight(input_shape)
            self._add_beta_weight(input_shape)
            self.built = True
            super().build(input_shape)
    
        def call(self, inputs, training=None):
            # Training=none is just for compat with batchnorm signature call
            input_shape = tf.keras.backend.int_shape(inputs)
            tensor_input_shape = tf.shape(inputs)
    
            reshaped_inputs, group_shape = self._reshape_into_groups(
                inputs, input_shape, tensor_input_shape
            )
    
            normalized_inputs = self._apply_normalization(reshaped_inputs, input_shape)
    
            outputs = tf.reshape(normalized_inputs, tensor_input_shape)
    
            return outputs
    
        def get_config(self):
            config = {
                "groups": self.groups,
                "axis": self.axis,
                "epsilon": self.epsilon,
                "center": self.center,
                "scale": self.scale,
                "beta_initializer": tf.keras.initializers.serialize(self.beta_initializer),
                "gamma_initializer": tf.keras.initializers.serialize(
                    self.gamma_initializer
                ),
                "beta_regularizer": tf.keras.regularizers.serialize(self.beta_regularizer),
                "gamma_regularizer": tf.keras.regularizers.serialize(
                    self.gamma_regularizer
                ),
                "beta_constraint": tf.keras.constraints.serialize(self.beta_constraint),
                "gamma_constraint": tf.keras.constraints.serialize(self.gamma_constraint),
            }
            base_config = super().get_config()
            return {**base_config, **config}
    
        def compute_output_shape(self, input_shape):
            return input_shape
    
        def _reshape_into_groups(self, inputs, input_shape, tensor_input_shape):
    
            group_shape = [tensor_input_shape[i] for i in range(len(input_shape))]
            group_shape[self.axis] = input_shape[self.axis] // self.groups
            group_shape.insert(self.axis, self.groups)
            group_shape = tf.stack(group_shape)
            reshaped_inputs = tf.reshape(inputs, group_shape)
            return reshaped_inputs, group_shape
    
        def _apply_normalization(self, reshaped_inputs, input_shape):
    
            group_shape = tf.keras.backend.int_shape(reshaped_inputs)
            group_reduction_axes = list(range(1, len(group_shape)))
            axis = -2 if self.axis == -1 else self.axis - 1
            group_reduction_axes.pop(axis)
    
            mean, variance = tf.nn.moments(
                reshaped_inputs, group_reduction_axes, keepdims=True
            )
    
            gamma, beta = self._get_reshaped_weights(input_shape)
            normalized_inputs = tf.nn.batch_normalization(
                reshaped_inputs,
                mean=mean,
                variance=variance,
                scale=gamma,
                offset=beta,
                variance_epsilon=self.epsilon,
            )
            return normalized_inputs
    
        def _get_reshaped_weights(self, input_shape):
            broadcast_shape = self._create_broadcast_shape(input_shape)
            gamma = None
            beta = None
            if self.scale:
                gamma = tf.reshape(self.gamma, broadcast_shape)
    
            if self.center:
                beta = tf.reshape(self.beta, broadcast_shape)
            return gamma, beta
    
        def _check_if_input_shape_is_none(self, input_shape):
            dim = input_shape[self.axis]
            if dim is None:
                raise ValueError(
                    "Axis " + str(self.axis) + " of "
                                               "input tensor should have a defined dimension "
                                               "but the layer received an input with shape " + str(input_shape) + "."
                )
    
        def _set_number_of_groups_for_instance_norm(self, input_shape):
            dim = input_shape[self.axis]
    
            if self.groups == -1:
                self.groups = dim
    
        def _check_size_of_dimensions(self, input_shape):
    
            dim = input_shape[self.axis]
            if dim < self.groups:
                raise ValueError(
                    "Number of groups (" + str(self.groups) + ") cannot be "
                                                              "more than the number of channels (" + str(dim) + ")."
                )
    
            if dim % self.groups != 0:
                raise ValueError(
                    "Number of groups (" + str(self.groups) + ") must be a "
                                                              "multiple of the number of channels (" + str(dim) + ")."
                )
    
        def _check_axis(self):
    
            if self.axis == 0:
                raise ValueError(
                    "You are trying to normalize your batch axis. Do you want to "
                    "use tf.layer.batch_normalization instead"
                )
    
        def _create_input_spec(self, input_shape):
    
            dim = input_shape[self.axis]
            self.input_spec = tf.keras.layers.InputSpec(
                ndim=len(input_shape), axes={self.axis: dim}
            )
    
        def _add_gamma_weight(self, input_shape):
    
            dim = input_shape[self.axis]
            shape = (dim,)
    
            if self.scale:
                self.gamma = self.add_weight(
                    shape=shape,
                    name="gamma",
                    initializer=self.gamma_initializer,
                    regularizer=self.gamma_regularizer,
                    constraint=self.gamma_constraint,
                )
            else:
                self.gamma = None
    
        def _add_beta_weight(self, input_shape):
    
            dim = input_shape[self.axis]
            shape = (dim,)
    
            if self.center:
                self.beta = self.add_weight(
                    shape=shape,
                    name="beta",
                    initializer=self.beta_initializer,
                    regularizer=self.beta_regularizer,
                    constraint=self.beta_constraint,
                )
            else:
                self.beta = None
    
        def _create_broadcast_shape(self, input_shape):
            broadcast_shape = [1] * len(input_shape)
            broadcast_shape[self.axis] = input_shape[self.axis] // self.groups
            broadcast_shape.insert(self.axis, self.groups)
            return broadcast_shape
    
        
    class TransformBlock(tf.keras.layers.Layer):
    
        def __init__(self, features,
                     norm_type,
                     momentum=0.9,
                     virtual_batch_size=None,
                     groups=2,
                     block_name='',
                     **kwargs):
            super(TransformBlock, self).__init__(**kwargs)
    
            self.features = features
            self.norm_type = norm_type
            self.momentum = momentum
            self.groups = groups
            self.virtual_batch_size = virtual_batch_size
            self.block_name = block_name
        
        def build(self, input_shape):
            self.transform = tf.keras.layers.Dense(self.features, use_bias=False, name=f'transformblock_dense_{self.block_name}')
            if self.norm_type == 'batch':
                self.bn = tf.keras.layers.BatchNormalization(axis=-1, momentum=momentum,
                                                             virtual_batch_size=virtual_batch_size,
                                                             name=f'transformblock_bn_{self.block_name}')
            else:
                self.bn = GroupNormalization(axis=-1, groups=self.groups, name=f'transformblock_gn_{self.block_name}')
                
            self.built = True
            super().build(input_shape)
            
        def call(self, inputs, training=None):
            x = self.transform(inputs)
            x = self.bn(x, training=training)
            return x
        
        def get_config(self):
            config = {
                "features": self.features,
                "norm_type": self.norm_type,
                "virtual_batch_size": self.virtual_batch_size,
                "groups": self.groups,
                "block_name": self.block_name
            }
            base_config = super().get_config()
            return {**base_config, **config}
        
        def compute_output_shape(self, input_shape):
            return input_shape
    
    
    class TabNetEncoderLayer(tf.keras.layers.Layer):
    
        def __init__(self, feature_columns,
                     feature_dim=16,
                     output_dim=8,
                     num_features=None,
                     num_decision_steps=3,
                     relaxation_factor=1.5,
                     sparsity_coefficient=1e-5,
                     norm_type='group',
                     batch_momentum=0.98,
                     virtual_batch_size=1024,
                     num_groups=2,
                     epsilon=1e-5,
                     **kwargs):
    
            super(TabNetEncoderLayer, self).__init__(**kwargs)
    
            # Input checks
            if feature_columns is not None:
                if type(feature_columns) not in (list, tuple):
                    raise ValueError("`feature_columns` must be a list or a tuple.")
    
                if len(feature_columns) == 0:
                    raise ValueError("`feature_columns` must be contain at least 1 tf.feature_column !")
    
                if num_features is None:
                    num_features = len(feature_columns)
                else:
                    num_features = int(num_features)
    
            else:
                if num_features is None:
                    raise ValueError("If `feature_columns` is None, then `num_features` cannot be None.")
    
            if num_decision_steps < 1:
                raise ValueError("Num decision steps must be greater than 0.")
            
            if feature_dim <= output_dim:
                raise ValueError("To compute `features_for_coef`, feature_dim must be larger than output dim")
    
            feature_dim = int(feature_dim)
            output_dim = int(output_dim)
            num_decision_steps = int(num_decision_steps)
            relaxation_factor = float(relaxation_factor)
            sparsity_coefficient = float(sparsity_coefficient)
            batch_momentum = float(batch_momentum)
            num_groups = max(1, int(num_groups))
            epsilon = float(epsilon)
    
            if relaxation_factor < 0.:
                raise ValueError("`relaxation_factor` cannot be negative !")
    
            if sparsity_coefficient < 0.:
                raise ValueError("`sparsity_coefficient` cannot be negative !")
    
            if virtual_batch_size is not None:
                virtual_batch_size = int(virtual_batch_size)
    
            if norm_type not in ['batch', 'group']:
                raise ValueError("`norm_type` must be either `batch` or `group`")
    
            self.feature_columns = feature_columns
            self.num_features = num_features
            self.feature_dim = feature_dim
            self.output_dim = output_dim
    
            self.num_decision_steps = num_decision_steps
            self.relaxation_factor = relaxation_factor
            self.sparsity_coefficient = sparsity_coefficient
            self.norm_type = norm_type
            self.batch_momentum = batch_momentum
            self.virtual_batch_size = virtual_batch_size
            self.num_groups = num_groups
            self.epsilon = epsilon
    
            if num_decision_steps > 1:
                features_for_coeff = feature_dim - output_dim
                print(f"[TabNet]: {features_for_coeff} features will be used for decision steps.")
    
            if self.feature_columns is not None:
                self.input_features = tf.keras.layers.DenseFeatures(feature_columns, trainable=True)
    
                if self.norm_type == 'batch':
                    self.input_bn = tf.keras.layers.BatchNormalization(axis=-1, momentum=batch_momentum, name='input_bn')
                else:
                    self.input_bn = GroupNormalization(axis=-1, groups=self.num_groups, name='input_gn')
    
            else:
                self.input_features = None
                self.input_bn = None
        
        def build(self, input_shape):
            self.transform_f1 = TransformBlock(2 * self.feature_dim, self.norm_type,
                                               self.batch_momentum, self.virtual_batch_size, self.num_groups,
                                               block_name='f1')
    
            self.transform_f2 = TransformBlock(2 * self.feature_dim, self.norm_type,
                                               self.batch_momentum, self.virtual_batch_size, self.num_groups,
                                               block_name='f2')
    
            self.transform_f3_list = [
                TransformBlock(2 * self.feature_dim, self.norm_type,
                               self.batch_momentum, self.virtual_batch_size, self.num_groups, block_name=f'f3_{i}')
                for i in range(self.num_decision_steps)
            ]
    
            self.transform_f4_list = [
                TransformBlock(2 * self.feature_dim, self.norm_type,
                               self.batch_momentum, self.virtual_batch_size, self.num_groups, block_name=f'f4_{i}')
                for i in range(self.num_decision_steps)
            ]
    
            self.transform_coef_list = [
                TransformBlock(self.num_features, self.norm_type,
                               self.batch_momentum, self.virtual_batch_size, self.num_groups, block_name=f'coef_{i}')
                for i in range(self.num_decision_steps - 1)
            ]
    
            self._step_feature_selection_masks = None
            self._step_aggregate_feature_selection_mask = None
            self.built = True
            super(TabNetEncoderLayer, self).build(input_shape)
    
        def call(self, inputs, training=None):
            if self.input_features is not None:
                features = self.input_features(inputs)
                features = self.input_bn(features, training=training)
    
            else:
                features = inputs
    
            batch_size = tf.shape(features)[0]
            self._step_feature_selection_masks = []
            self._step_aggregate_feature_selection_mask = None
    
            # Initializes decision-step dependent variables.
            output_aggregated = tf.zeros([batch_size, self.output_dim])
            masked_features = features
            mask_values = tf.zeros([batch_size, self.num_features])
            aggregated_mask_values = tf.zeros([batch_size, self.num_features])
            complementary_aggregated_mask_values = tf.ones(
                [batch_size, self.num_features])
    
            total_entropy = 0.0
            entropy_loss = 0.
    
            for ni in range(self.num_decision_steps):
                # Feature transformer with two shared and two decision step dependent
                # blocks is used below.=
                transform_f1 = self.transform_f1(masked_features, training=training)
                transform_f1 = glu(transform_f1, self.feature_dim)
    
                transform_f2 = self.transform_f2(transform_f1, training=training)
                transform_f2 = (glu(transform_f2, self.feature_dim) +
                                transform_f1) * tf.math.sqrt(0.5)
    
                transform_f3 = self.transform_f3_list[ni](transform_f2, training=training)
                transform_f3 = (glu(transform_f3, self.feature_dim) +
                                transform_f2) * tf.math.sqrt(0.5)
    
                transform_f4 = self.transform_f4_list[ni](transform_f3, training=training)
                transform_f4 = (glu(transform_f4, self.feature_dim) +
                                transform_f3) * tf.math.sqrt(0.5)
    
                if (ni > 0 or self.num_decision_steps == 1):
                    decision_out = tf.nn.relu(transform_f4[:, :self.output_dim])
    
                    # Decision aggregation.
                    output_aggregated += decision_out
    
                    # Aggregated masks are used for visualization of the
                    # feature importance attributes.
                    scale_agg = tf.reduce_sum(decision_out, axis=1, keepdims=True)
    
                    if self.num_decision_steps > 1:
                        scale_agg = scale_agg / tf.cast(self.num_decision_steps - 1, tf.float32)
    
                    aggregated_mask_values += mask_values * scale_agg
    
                features_for_coef = transform_f4[:, self.output_dim:]
    
                if ni < (self.num_decision_steps - 1):
                    # Determines the feature masks via linear and nonlinear
                    # transformations, taking into account of aggregated feature use.
                    mask_values = self.transform_coef_list[ni](features_for_coef, training=training)
                    mask_values *= complementary_aggregated_mask_values
                    mask_values = sparsemax(mask_values, axis=-1)
    
                    # Relaxation factor controls the amount of reuse of features between
                    # different decision blocks and updated with the values of
                    # coefficients.
                    complementary_aggregated_mask_values *= (
                            self.relaxation_factor - mask_values)
    
                    # Entropy is used to penalize the amount of sparsity in feature
                    # selection.
                    total_entropy += tf.reduce_mean(
                        tf.reduce_sum(
                            -mask_values * tf.math.log(mask_values + self.epsilon), axis=1)) / (
                                         tf.cast(self.num_decision_steps - 1, tf.float32))
    
                    # Add entropy loss
                    entropy_loss = total_entropy
    
                    # Feature selection.
                    masked_features = tf.multiply(mask_values, features)
    
                    # Visualization of the feature selection mask at decision step ni
                    # tf.summary.image(
                    #     "Mask for step" + str(ni),
                    #     tf.expand_dims(tf.expand_dims(mask_values, 0), 3),
                    #     max_outputs=1)
                    mask_at_step_i = tf.expand_dims(tf.expand_dims(mask_values, 0), 3)
                    self._step_feature_selection_masks.append(mask_at_step_i)
    
                else:
                    # This branch is needed for correct compilation by tf.autograph
                    entropy_loss = 0.
    
            # Adds the loss automatically
            self.add_loss(self.sparsity_coefficient * entropy_loss)
    
            # Visualization of the aggregated feature importances
            # tf.summary.image(
            #     "Aggregated mask",
            #     tf.expand_dims(tf.expand_dims(aggregated_mask_values, 0), 3),
            #     max_outputs=1)
    
            agg_mask = tf.expand_dims(tf.expand_dims(aggregated_mask_values, 0), 3)
            self._step_aggregate_feature_selection_mask = agg_mask
            return output_aggregated
    
        def feature_selection_masks(self):
            return self._step_feature_selection_masks
    
        def aggregate_feature_selection_mask(self):
            return self._step_aggregate_feature_selection_mask
        
        def compute_output_shape(self, input_shape):
            return self.output_dim
        
        def get_config(self):
            config = {
                "feature_columns": self.feature_columns,
                "num_features": self.num_features,
                "feature_dim": self.feature_dim,
                "output_dim": self.output_dim,
                "num_decision_steps": self.num_decision_steps,
                "relaxation_factor": self.relaxation_factor,
                "sparsity_coefficient": self.sparsity_coefficient,
                "norm_type": self.norm_type,
                "batch_momentum": self.batch_momentum,
                "virtual_batch_size": self.virtual_batch_size,
                "num_groups": self.num_groups,
                "epsilon": self.epsilon,
            }
            base_config = super().get_config()
            return {**base_config, **config}
        
        
    # 必须也将 UserLayer 赋值给 m12_layer_class_bigquant_run
    m12_layer_class_bigquant_run = TabNetEncoderLayer
    
    from tensorflow.keras.optimizers import Adam, schedules
    
    lr = schedules.ExponentialDecay(0.02, decay_steps=2000, decay_rate=0.9, staircase=False)
    
    m5_user_optimizer_bigquant_run=Adam(lr)
    from tensorflow.keras.callbacks import EarlyStopping
    
    m5_earlystop_bigquant_run=EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=5)
    # 用户的自定义层需要写到字典中,比如
    # {
    #   "MyLayer": MyLayer
    # }
    m5_custom_objects_bigquant_run = {
        "GroupNormalization": GroupNormalization,
        "TransformBlock": TransformBlock,
        "TabNetEncoderLayer": TabNetEncoderLayer
    }
    
    # Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
    def m24_run_bigquant_run(input_1, input_2, input_3):
        # 示例代码如下。在这里编写您的代码
        pred_label = input_1.read_pickle()
        df = input_2.read_df()
        df = pd.DataFrame({'pred_label':pred_label[:,0], 'instrument':df.instrument, 'date':df.date})
        df.sort_values(['date','pred_label'],inplace=True, ascending=[True,False])
        return Outputs(data_1=DataSource.write_df(df), data_2=None, data_3=None)
    
    # 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
    def m24_post_run_bigquant_run(outputs):
        return outputs
    
    # 回测引擎:初始化函数,只执行一次
    def m19_initialize_bigquant_run(context):
        # 加载预测数据
        context.ranker_prediction = context.options['data'].read_df()
    
        # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
        context.set_commission(PerOrder(buy_cost=0.001, sell_cost=0.001, min_cost=5))
        # 预测数据,通过options传入进来,使用 read_df 函数,加载到内存 (DataFrame)
        # 设置买入的股票数量,这里买入预测股票列表排名靠前的5只
        stock_count = 20
        # 每只的股票的权重,如下的权重分配会使得靠前的股票分配多一点的资金,[0.339160, 0.213986, 0.169580, ..]
        context.stock_weights = T.norm([1 / math.log(i + 2) for i in range(0, stock_count)])
        # 设置每只股票占用的最大资金比例
        context.max_cash_per_instrument = 0.2
        context.options['hold_days'] = 5
    
    # 回测引擎:每日数据处理函数,每天执行一次
    def m19_handle_data_bigquant_run(context, data):
        # 按日期过滤得到今日的预测数据
        ranker_prediction = context.ranker_prediction[
            context.ranker_prediction.date == data.current_dt.strftime('%Y-%m-%d')]
    
        # 1. 资金分配
        # 平均持仓时间是hold_days,每日都将买入股票,每日预期使用 1/hold_days 的资金
        # 实际操作中,会存在一定的买入误差,所以在前hold_days天,等量使用资金;之后,尽量使用剩余资金(这里设置最多用等量的1.5倍)
        is_staging = context.trading_day_index < context.options['hold_days'] # 是否在建仓期间(前 hold_days 天)
        cash_avg = context.portfolio.portfolio_value / context.options['hold_days']
        cash_for_buy = min(context.portfolio.cash, (1 if is_staging else 1.5) * cash_avg)
        cash_for_sell = cash_avg - (context.portfolio.cash - cash_for_buy)
        positions = {e.symbol: p.amount * p.last_sale_price
                     for e, p in context.perf_tracker.position_tracker.positions.items()}
    
        # 2. 生成卖出订单:hold_days天之后才开始卖出;对持仓的股票,按机器学习算法预测的排序末位淘汰
        if not is_staging and cash_for_sell > 0:
            equities = {e.symbol: e for e, p in context.perf_tracker.position_tracker.positions.items()}
            instruments = list(reversed(list(ranker_prediction.instrument[ranker_prediction.instrument.apply(
                    lambda x: x in equities and not context.has_unfinished_sell_order(equities[x]))])))
            # print('rank order for sell %s' % instruments)
            for instrument in instruments:
                context.order_target(context.symbol(instrument), 0)
                cash_for_sell -= positions[instrument]
                if cash_for_sell <= 0:
                    break
    
        # 3. 生成买入订单:按机器学习算法预测的排序,买入前面的stock_count只股票
        buy_cash_weights = context.stock_weights
        buy_instruments = list(ranker_prediction.instrument[:len(buy_cash_weights)])
        max_cash_per_instrument = context.portfolio.portfolio_value * context.max_cash_per_instrument
        for i, instrument in enumerate(buy_instruments):
            cash = cash_for_buy * buy_cash_weights[i]
            if cash > max_cash_per_instrument - positions.get(instrument, 0):
                # 确保股票持仓量不会超过每次股票最大的占用资金量
                cash = max_cash_per_instrument - positions.get(instrument, 0)
            if cash > 0:
                context.order_value(context.symbol(instrument), cash)
    
    # 回测引擎:准备数据,只执行一次
    def m19_prepare_bigquant_run(context):
        pass
    
    
    m1 = M.instruments.v2(
        start_date='2014-01-01',
        end_date='2017-12-31',
        market='CN_STOCK_A',
        instrument_list='',
        max_count=0
    )
    
    m2 = M.advanced_auto_labeler.v2(
        instruments=m1.data,
        label_expr="""# #号开始的表示注释
    # 0. 每行一个,顺序执行,从第二个开始,可以使用label字段
    # 1. 可用数据字段见 https://bigquant.com/docs/data_history_data.html
    #   添加benchmark_前缀,可使用对应的benchmark数据
    # 2. 可用操作符和函数见 `表达式引擎 <https://bigquant.com/docs/big_expr.html>`_
    
    # 计算收益:5日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)
    shift(close, -5) / shift(open, -1)-1
    
    # 极值处理:用1%和99%分位的值做clip
    clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))
    
    # 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)
    where(shift(high, -1) == shift(low, -1), NaN, label)
    """,
        start_date='',
        end_date='',
        benchmark='000300.SHA',
        drop_na_label=True,
        cast_label_int=False
    )
    
    m29 = M.standardlize.v8(
        input_1=m2.data,
        columns_input='label'
    )
    
    m3 = M.input_features.v1(
        features="""close_0
    open_0
    high_0
    low_0 
    amount_0
    turn_0 
    return_0
     
    close_1
    open_1
    high_1
    low_1
    return_1
    amount_1
    turn_1
     
    close_2
    open_2
    high_2
    low_2
    amount_2
    turn_2
    return_2
     
    close_3
    open_3
    high_3
    low_3
    amount_3
    turn_3
    return_3
     
    close_4
    open_4
    high_4
    low_4
    amount_4
    turn_4
    return_4
     
    mean(close_0, 5)
    mean(low_0, 5)
    mean(open_0, 5)
    mean(high_0, 5)
    mean(turn_0, 5)
    mean(amount_0, 5)
    mean(return_0, 5)
     
    ts_max(close_0, 5)
    ts_max(low_0, 5)
    ts_max(open_0, 5)
    ts_max(high_0, 5)
    ts_max(turn_0, 5)
    ts_max(amount_0, 5)
    ts_max(return_0, 5)
     
    ts_min(close_0, 5)
    ts_min(low_0, 5)
    ts_min(open_0, 5)
    ts_min(high_0, 5)
    ts_min(turn_0, 5)
    ts_min(amount_0, 5)
    ts_min(return_0, 5) 
     
    std(close_0, 5)
    std(low_0, 5)
    std(open_0, 5)
    std(high_0, 5)
    std(turn_0, 5)
    std(amount_0, 5)
    std(return_0, 5)
     
    ts_rank(close_0, 5)
    ts_rank(low_0, 5)
    ts_rank(open_0, 5)
    ts_rank(high_0, 5)
    ts_rank(turn_0, 5)
    ts_rank(amount_0, 5)
    ts_rank(return_0, 5)
     
    decay_linear(close_0, 5)
    decay_linear(low_0, 5)
    decay_linear(open_0, 5)
    decay_linear(high_0, 5)
    decay_linear(turn_0, 5)
    decay_linear(amount_0, 5)
    decay_linear(return_0, 5)
     
    correlation(volume_0, return_0, 5)
    correlation(volume_0, high_0, 5)
    correlation(volume_0, low_0, 5)
    correlation(volume_0, close_0, 5)
    correlation(volume_0, open_0, 5)
    correlation(volume_0, turn_0, 5)
      
    correlation(return_0, high_0, 5)
    correlation(return_0, low_0, 5)
    correlation(return_0, close_0, 5)
    correlation(return_0, open_0, 5)
    correlation(return_0, turn_0, 5)
     
    correlation(high_0, low_0, 5)
    correlation(high_0, close_0, 5)
    correlation(high_0, open_0, 5)
    correlation(high_0, turn_0, 5)
     
    correlation(low_0, close_0, 5)
    correlation(low_0, open_0, 5)
    correlation(low_0, turn_0, 5)
     
    correlation(close_0, open_0, 5)
    correlation(close_0, turn_0, 5)
    
    correlation(open_0, turn_0, 5)"""
    )
    
    m15 = M.general_feature_extractor.v7(
        instruments=m1.data,
        features=m3.data,
        start_date='',
        end_date='',
        before_start_days=0
    )
    
    m16 = M.derived_feature_extractor.v3(
        input_data=m15.data,
        features=m3.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=True,
        remove_extra_columns=False
    )
    
    m28 = M.standardlize.v8(
        input_1=m16.data,
        input_2=m3.data,
        columns_input='[]'
    )
    
    m13 = M.fillnan.v1(
        input_data=m28.data,
        features=m3.data,
        fill_value='0.0'
    )
    
    m7 = M.join.v3(
        data1=m29.data,
        data2=m13.data,
        on='date,instrument',
        how='inner',
        sort=False
    )
    
    m26 = M.dl_convert_to_bin.v2(
        input_data=m7.data,
        features=m3.data,
        window_size=1,
        feature_clip=3,
        flatten=True,
        window_along_col='instrument'
    )
    
    m10 = M.cached.v3(
        input_1=m26.data,
        run=m10_run_bigquant_run,
        post_run=m10_post_run_bigquant_run,
        input_ports='',
        params='{}',
        output_ports=''
    )
    
    m9 = M.instruments.v2(
        start_date=T.live_run_param('trading_date', '2018-01-01'),
        end_date=T.live_run_param('trading_date', '2022-08-10'),
        market='CN_STOCK_A',
        instrument_list="""000837.SZA
    002885.SZA
    002665.SZA
    002204.SZA
    000899.SZA
    600647.SHA
    600758.SHA
    600757.SHA
    600756.SHA
    600755.SHA
    000001.SZA
    600750.SHA
    600749.SHA
    600748.SHA
    600746.SHA
    600745.SHA
    600744.SHA
    600754.SHA
    600751.SHA
    600760.SHA
    000661.SZA
    003021.SZA
    600580.SHA
    605358.SHA
    002229.SZA
    002231.SZA
    603929.SHA
    688002.SHA
    600641.SHA
    002119.SZA
    688699.SHA
    603823.SHA
    688083.SHA
    002987.SZA
    001270.SZA
    002344.SZA
    002346.SZA
    603726.SHA
    002276.SZA
    002156.SZA
    """,
        max_count=0
    )
    
    m17 = M.general_feature_extractor.v7(
        instruments=m9.data,
        features=m3.data,
        start_date='',
        end_date='',
        before_start_days=0
    )
    
    m18 = M.derived_feature_extractor.v3(
        input_data=m17.data,
        features=m3.data,
        date_col='date',
        instrument_col='instrument',
        drop_na=True,
        remove_extra_columns=False
    )
    
    m25 = M.standardlize.v8(
        input_1=m18.data,
        input_2=m3.data,
        columns_input='[]'
    )
    
    m14 = M.fillnan.v1(
        input_data=m25.data,
        features=m3.data,
        fill_value='0.0'
    )
    
    m27 = M.dl_convert_to_bin.v2(
        input_data=m14.data,
        features=m3.data,
        window_size=1,
        feature_clip=3,
        flatten=True,
        window_along_col='instrument'
    )
    
    m6 = M.dl_layer_input.v1(
        shape='98',
        batch_shape='',
        dtype='float32',
        sparse=False,
        name=''
    )
    
    m12 = M.dl_layer_userlayer.v1(
        input1=m6.data,
        layer_class=m12_layer_class_bigquant_run,
        params="""{
        "num_features": 98, 
        "feature_columns": None,
        "feature_dim": 64,
        "output_dim": 32,
        "num_decision_steps": 3,
        "relaxation_factor": 1.3,
        "sparsity_coefficient": 1e-5,
        "norm_type": "group",
        "batch_momentum": 0.9,
        "virtual_batch_size": 128,
        "num_groups": 2,
        "epsilon": 1e-5
    }""",
        name=''
    )
    
    m23 = M.dl_layer_dense.v1(
        inputs=m12.data,
        units=1,
        activation='linear',
        use_bias=False,
        kernel_initializer='Zeros',
        bias_initializer='Zeros',
        kernel_regularizer='None',
        kernel_regularizer_l1=0,
        kernel_regularizer_l2=0,
        bias_regularizer='None',
        bias_regularizer_l1=0,
        bias_regularizer_l2=0,
        activity_regularizer='None',
        activity_regularizer_l1=0,
        activity_regularizer_l2=0,
        kernel_constraint='None',
        bias_constraint='None',
        name=''
    )
    
    m4 = M.dl_model_init.v1(
        inputs=m6.data,
        outputs=m23.data
    )
    
    m5 = M.dl_model_train.v1(
        input_model=m4.data,
        training_data=m10.data_1,
        validation_data=m10.data_2,
        optimizer='自定义',
        user_optimizer=m5_user_optimizer_bigquant_run,
        loss='mean_squared_error',
        metrics='mse',
        batch_size=10240,
        epochs=100,
        earlystop=m5_earlystop_bigquant_run,
        custom_objects=m5_custom_objects_bigquant_run,
        n_gpus=0,
        verbose='2:每个epoch输出一行记录',
        m_cached=False
    )
    
    m11 = M.dl_model_predict.v1(
        trained_model=m5.data,
        input_data=m27.data,
        batch_size=1024,
        n_gpus=0,
        verbose='2:每个epoch输出一行记录'
    )
    
    m24 = M.cached.v3(
        input_1=m11.data,
        input_2=m18.data,
        run=m24_run_bigquant_run,
        post_run=m24_post_run_bigquant_run,
        input_ports='',
        params='{}',
        output_ports=''
    )
    
    m19 = M.trade.v4(
        instruments=m9.data,
        options_data=m24.data_1,
        start_date='',
        end_date='',
        initialize=m19_initialize_bigquant_run,
        handle_data=m19_handle_data_bigquant_run,
        prepare=m19_prepare_bigquant_run,
        volume_limit=0.025,
        order_price_field_buy='open',
        order_price_field_sell='close',
        capital_base=1000000,
        auto_cancel_non_tradable_orders=True,
        data_frequency='daily',
        price_type='后复权',
        product_type='股票',
        plot_charts=True,
        backtest_only=False,
        benchmark='000300.SHA'
    )
    
    [TabNet]: 32 features will be used for decision steps.
    
    ---------------------------------------------------------------------------
    NotImplementedError                       Traceback (most recent call last)
    <ipython-input-3-9033dde4446f> in <module>
       1047 )
       1048 
    -> 1049 m12 = M.dl_layer_userlayer.v1(
       1050     input1=m6.data,
       1051     layer_class=m12_layer_class_bigquant_run,
    
    NotImplementedError: in user code:
    
        <ipython-input-1-2ca9988ca5e5>:527 call  *
            output_aggregated = tf.zeros([batch_size, self.output_dim])
        /usr/local/python3/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:201 wrapper  **
            return target(*args, **kwargs)
        /usr/local/python3/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:2819 wrapped
            tensor = fun(*args, **kwargs)
        /usr/local/python3/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:2868 zeros
            output = _constant_if_small(zero, shape, dtype, name)
        /usr/local/python3/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:2804 _constant_if_small
            if np.prod(shape) < 1000:
        <__array_function__ internals>:180 prod
            
        /usr/local/python3/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3088 prod
            return _wrapreduction(a, np.multiply, 'prod', axis, dtype, out,
        /usr/local/python3/lib/python3.8/site-packages/numpy/core/fromnumeric.py:86 _wrapreduction
            return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
        /usr/local/python3/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:852 __array__
            raise NotImplementedError(
    
        NotImplementedError: Cannot convert a symbolic Tensor (tab_net_encoder_layer_1/strided_slice:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported