问答交流

使用封装的xgboost视图报错

由jliu15创建,最终由xuxiaoyin 被浏览 2 用户

代码如下:请问我该如何修改才能不报错:

--------------------------------------------------------------------------- XGBoostError Traceback (most recent call last) Cell In[13], line 98 85 m3 = M.extract_data_dai.v20( 86 sql=m1.data, 87 start_date="""2025-07-01""", (...) 94 m_name="""m3""" 95 ) 97 # @module(position="-107.83115196228027,118.96104431152344", comment="""""", comment_collapsed=True) ---> 98 m7 = M.xgboost.v6( 99 train_ds=m9.data, 100 test_ds=m3.data, 101 objective="""排序学习-NDCG""", 102 number_of_leaves=30, 103 min_docs_per_leaf=1, 104 number_of_trees=5, 105 number_of_depth=6, 106 learning_rate=0.1, 107 max_bins=1023, 108 feature_fraction=1, 109 data_row_fraction=1, 110 log_level="""信息""", 111 plot_charts=True, 112 use_gpu=False, 113 m_name="""m7""" 114 ) 116 # @module(position="-119,206", comment="""等权分配""") 117 m6 = M.score_to_position.v4( 118 input_1=m7.predictions, 119 input_3=m7.model_pack, (...) 125 m_name="""m6""" 126 ) File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/modules.py:28, in call(self, **kwargs) File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/moduleinvoker.py:203, in module_invoke(name, version, kwargs) File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/moduleinvoker.py:169, in _module_invoke(name, version, kwargs) File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/moduleinvoker.py:41, in _module_run(module, kwargs) File dist/build/xgboost/v6/init.py:261, in v6.run() File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/modules.py:28, in call(self, **kwargs) File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/moduleinvoker.py:203, in module_invoke(name, version, kwargs) File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/moduleinvoker.py:169, in _module_invoke(name, version, kwargs) File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/moduleinvoker.py:41, in _module_run(module, kwargs) File dist/build/python/v1/init.py:54, in v1.run() File dist/build/xgboost/v6/init.py:139, in v6._train() File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/xgboost/core.py:620, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs) 618 for k, arg in zip(sig.parameters, args): 619 kwargs[k] = arg --> 620 return func(**kwargs) File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/xgboost/core.py:743, in DMatrix.init(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, group, qid, label_lower_bound, label_upper_bound, feature_weights, enable_categorical) 740 assert self.handle is not None 741 return --> 743 handle, feature_names, feature_types = dispatch_data_backend( 744 data, 745 missing=self.missing, 746 threads=self.nthread, 747 feature_names=feature_names, 748 feature_types=feature_types, 749 enable_categorical=enable_categorical, 750 ) 751 assert handle is not None 752 self.handle = handle File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/xgboost/data.py:957, in dispatch_data_backend(data, missing, threads, feature_names, feature_types, enable_categorical) 955 return _from_tuple(data, missing, threads, feature_names, feature_types) 956 if _is_pandas_df(data): --> 957 return _from_pandas_df(data, enable_categorical, missing, threads, 958 feature_names, feature_types) 959 if _is_pandas_series(data): 960 return _from_pandas_series( 961 data, missing, threads, enable_categorical, feature_names, feature_types 962 ) File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/xgboost/data.py:407, in _from_pandas_df(data, enable_categorical, missing, nthread, feature_names, feature_types) 396 def _from_pandas_df( 397 data: DataFrame,\n

from bigmodule import M

# <aistudiograph>

# @module(position="-85.07792663574219,-168.0779266357422", comment="""因子特征,用表达式构建因子""")
m1 = M.input_features_dai.v30(
    mode="""表达式""",
    expr="""-- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
-- 数据&字段: 数据文档 https://bigquant.com/data/home
-- 数据使用: 表名.字段名, 对于没有指定表名的列, 会从 expr_tables 推断, 如果同名字段在多个表中出现, 需要显式的给出表名

--    input_1.* EXCLUDE(date, instrument)
    cn_stock_factors_alpha_101.alpha_001
    cn_stock_factors_alpha_101.alpha_002
    cn_stock_factors_alpha_101.alpha_003
    cn_stock_factors_alpha_101.alpha_004


-- cn_stock_bar1d.close / cn_stock_bar1d.open
-- cn_stock_prefactors https://bigquant.com/data/datasources/cn_stock_prefactors 是常用因子表(VIEW), JOIN了很多数据表, 性能会比直接用相关表慢一点, 但使用简单
-- cn_stock_prefactors.pe_ttm

-- 表达式模式下, 会自动join输入数据1/2/3, 可以在表达式里直接使用其字段。包括 input_1 的所有列但去掉 date, instrument。注意字段不能有重复的, 否则会报错
-- input_1.* EXCLUDE(date, instrument)
-- input_1.close
-- input_2.close / input_1.close
""",
    expr_filters="""-- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
-- 数据&字段: 数据文档 https://bigquant.com/data/home
-- 表达式模式的过滤都是放在 QUALIFY 里, 即数据查询、计算, 最后才到过滤条件

-- c_pct_rank(-return_90) <= 0.3
-- c_pct_rank(return_30) <= 0.3
-- cn_stock_bar1d.turn > 0.02
""",
    expr_tables="""cn_stock_real_bar1d;cn_stock_factors_alpha_101""",
    extra_fields="""date,instrument""",
    order_by="""date,instrument""",
    expr_drop_na=False,
    extract_data=False,
    m_name="""m1"""
)

# @module(position="-253,-59", comment="""加数据标注""")
m2 = M.input_features_dai.v30(
    input_1=m1.data,
    mode="""表达式""",
    expr="""-- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
-- 数据&字段: 数据文档 https://bigquant.com/data/home
-- 数据使用: 表名.字段名, 对于没有指定表名的列, 会从 expr_tables 推断, 如果同名字段在多个表中出现, 需要显式的给出表名

input_1.* EXCLUDE(date, instrument)
(m_lead(close, 5) - m_lead(open, 1)) * 100 / m_lead(open, 1) AS label
""",
    expr_filters="""-- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
-- 数据&字段: 数据文档 https://bigquant.com/data/home
-- 表达式模式的过滤都是放在 QUALIFY 里, 即数据查询、计算, 最后才到过滤条件

-- c_pct_rank(-return_90) <= 0.3
-- c_pct_rank(return_30) <= 0.3
-- cn_stock_bar1d.turn > 0.02
""",
    expr_tables="""cn_stock_real_bar1d;cn_stock_factors_alpha_101""",
    extra_fields="""date, instrument""",
    order_by="""date, instrument""",
    expr_drop_na=True,
    extract_data=False,
    m_name="""m2"""
)

# @module(position="-254,38", comment="""""", comment_collapsed=True)
m9 = M.extract_data_dai.v20(
    sql=m2.data,
    start_date="""2024-01-01""",
    start_date_bound_to_trading_date=True,
    end_date="""2025-06-30""",
    end_date_bound_to_trading_date=True,
    before_start_days=10,
    keep_before=False,
    debug=False,
    m_name="""m9"""
)

# @module(position="66,-56", comment="""""", comment_collapsed=True)
m3 = M.extract_data_dai.v20(
    sql=m1.data,
    start_date="""2025-07-01""",
    start_date_bound_to_trading_date=False,
    end_date="""2025-08-15""",
    end_date_bound_to_trading_date=False,
    before_start_days=10,
    keep_before=False,
    debug=False,
    m_name="""m3"""
)

# @module(position="-107.83115196228027,118.96104431152344", comment="""""", comment_collapsed=True)
m7 = M.xgboost.v6(
    train_ds=m9.data,
    test_ds=m3.data,
    objective="""排序学习-NDCG""",
    number_of_leaves=30,
    min_docs_per_leaf=1,
    number_of_trees=5,
    number_of_depth=6,
    learning_rate=0.1,
    max_bins=1023,
    feature_fraction=1,
    data_row_fraction=1,
    log_level="""信息""",
    plot_charts=True,
    use_gpu=False,
    m_name="""m7"""
)

# @module(position="-119,206", comment="""等权分配""")
m6 = M.score_to_position.v4(
    input_1=m7.predictions,
    input_3=m7.model_pack,
    score_field="""score DESC""",
    hold_count=10,
    total_position=1,
    extract_data=True,
    m_name="""m6"""
)

# @module(position="-148,335", comment="""""", comment_collapsed=True)
m4 = M.bigtrader.v47(
    data=m6.data,
    start_date="""""",
    end_date="""""",
    capital_base=1000000,
    frequency="""daily""",
    product_type="""股票""",
    rebalance_period_type="""交易日""",
    rebalance_period_days="""1""",
    rebalance_period_roll_forward=True,
    backtest_engine_mode="""标准模式""",
    before_start_days=0,
    volume_limit=1,
    order_price_field_buy="""open""",
    order_price_field_sell="""close""",
    benchmark="""沪深300指数""",
    plot_charts=True,
    debug=False,
    backtest_only=False,
    m_name="""m4"""
)
# </aistudiograph>

\

标签

XGBoostPython
评论
  • 你挑选的特征有点问题,alpha101只取有限种类的值
{link}