问答交流

lightgbm模型报错,但是stockranker可以,不知道什么问题

由csowen创建,最终由xuxiaoyin 被浏览 6 用户

#https://bigquant.com/codesharev3/9cd9a4a0-4050-40f2-b3a7-8b7652ebff63

from bigmodule import M
# @param(id="m7", name="initialize")
# 交易引擎:初始化函数,只执行一次
def m7_initialize_bigquant_run(context):
    from bigtrader.finance.commission import PerOrder
    # 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
    context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
    context.hold_stock = 10
# @param(id="m7", name="before_trading_start")
# 交易引擎:每个单位时间开盘前调用一次。
def m7_before_trading_start_bigquant_run(context, data):
    # 盘前处理,订阅行情等
    pass

# @param(id="m7", name="handle_tick")
# 交易引擎:tick数据处理函数,每个tick执行一次
def m7_handle_tick_bigquant_run(context, tick):
    pass

# @param(id="m7", name="handle_data")
def m7_handle_data_bigquant_run(context, data):
    import pandas as pd
    # 下一个交易日不是调仓日,则不生成信号
    if not context.rebalance_period.is_signal_date(data.current_dt.date()):
        return
    # 从传入的数据 context.data 中读取今天的信号数据
    today_df = context.data[context.data["date"] == data.current_dt.strftime("%Y-%m-%d")]
    # 卖出不在目标持有列表中的股票
    count=0
    for instrument in sorted(set(context.get_account_positions().keys()) - set(today_df["instrument"])):
        context.order_target_percent(instrument, 0)
        #print ('sell',instrument,data.current_dt.strftime("%Y-%m-%d"))

    #买入目标持有列表中的股票
    count=0
    for i, x in today_df.iterrows():# buy-signal,top-6
        num = (0.0 if pd.isnull(x.position) else x.position)
        #print (num, i,x.instrument,data.current_dt.strftime("%Y-%m-%d"))
        context.order_target_percent(x.instrument, 0.0 if pd.isnull(x.position) else x.position)

# @param(id="m7", name="handle_trade")
# 交易引擎:成交回报处理函数,每个成交发生时执行一次
def m7_handle_trade_bigquant_run(context, trade):
    pass

# @param(id="m7", name="handle_order")
# 交易引擎:委托回报处理函数,每个委托变化时执行一次
def m7_handle_order_bigquant_run(context, order):
    pass

# @param(id="m7", name="after_trading")
# 交易引擎:盘后处理函数,每日盘后执行一次
def m7_after_trading_bigquant_run(context, data):
    pass


#-- 数据使用: 表名.字段名, 对于没有指定表名的列, 会从 expr_tables 推断, 如果同名字段在多个表中出现, 需要显式的给出表名
#-- cn_stock_prefactors https://bigquant.com/data/datas
#pe_ttm  -- 滚动市盈率,即股价除以过去12个月的每股收益
#pb      -- 市净率
#ps_ttm  -- 滚动市销率
#pcf_net_leading -- 现金流比率, 股价与​​预测净现金流​​的比值(前瞻性)
#pcf_op_ttm -- 股价与​​过去12个月经营现金流​​的比值(历史性)
#total_market_cap -- 总市值 = 总股本 × 股价。
#float_market_cap -- 流通市值 = 流通股 × 股价。
#dividend_yield_ratio -- 股息率,= 年度分红总额 / 总市值 × 100%
#momentum_5 -- 过去5日股价涨幅,反映短期趋势强度
#reversal_5 -- 过去5日股价跌幅,捕捉超卖反弹机会。
#volatility_5 --过去5日收益率的标准差,衡量股价波动幅度,σ = √[∑(每日收益率 - 平均收益率)² / 5]
#turn -- 换手率,​​日成交量 / 流通股本​​,反映股票活跃度与资金关注度
m0 = M.input_features_dai.v30(
    mode="""SQL""",
    sql="""-- 使用DAI SQL获取数据, 构建因子等, 
-- DAI SQL 语法: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-sql%E5%85%A5%E9%97%A8%E6%95%99%E7%A8%8B
-- 使用数据输入1/2/3里的字段: e.g. input_1.close, input_1.* EXCLUDE(date, instrument)
SELECT
    -- 在这里输入因子表达式
    -- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
    -- 数据&字段: 数据文档 https://bigquant.com/data/home    
    m_lag(cn_stock_bar1d.close, 5) / cn_stock_bar1d.close AS return_5,
    m_lag(cn_stock_bar1d.close, 10) / cn_stock_bar1d.close AS return_10,
    c_pct_rank(return_10) AS rank_return_10,
    c_pct_rank(return_5) AS rank_return_5,
    cn_stock_bar1d.close / m_lag(cn_stock_bar1d.close, 1) AS return_0,
    return_0 / rank_return_5 AS  first_a,
    rank_return_5 / rank_return_10 AS first_b,
    -- rank_return_10, 
    -- rank_return_5, 
    volatility_5, 
    -- 日期和股票代码
    date, instrument
FROM cn_stock_bar1d
JOIN cn_stock_prefactors USING (date, instrument)
WHERE
    -- WHERE 过滤, 在窗口等计算算子之前执行
    -- 剔除ST股票
    st_status = 0
    AND list_days > 260
    -- AND is_risk_warning = 0 -- 剔除那些被监管机构标记为风险警示的股票,进一步降低投资组合的风险。
    -- AND cn_stock_bar1d.close/cn_stock_bar1d.adjust_factor > 2
QUALIFY
    -- QUALIFY 过滤, 在窗口等计算算子之后执行, 比如 m_lag(close, 3) AS close_3, 对于 close_3 的过滤需要放到这里
    -- 去掉有空值的行    
    COLUMNS(*) IS NOT NULL
--  按日期和股票代码排序, 从小到大
ORDER BY date, instrument
    """,
    extra_fields="date, instrument",
    order_by="date, instrument",
    m_name="""m0"""
)

#add--label
# @module(position="-258,-25", comment="""加数据标注""")
m2 = M.input_features_dai.v30(
    input_1=m0.data,
    mode="""表达式""",
    expr="""-- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
-- 数据&字段: 数据文档 https://bigquant.com/data/home
-- 数据使用: 表名.字段名, 对于没有指定表名的列, 会从 expr_tables 推断, 如果同名字段在多个表中出现, 需要显式的给出表名
-- 从输入数据 input_1 中选择所有列,但排除 date 和 instrument 这两列。
input_1.* EXCLUDE(date, instrument) 
-- 计算股票标签,使用 m_lead 函数计算未来收益率。m_lead(open, 5) 获取未来第 5 天的开盘价,
m_lead(close, 2) / m_lead(open, 1) AS _future_return
c_quantile_cont(_future_return, 0.01) AS _future_return_1pct
c_quantile_cont(_future_return, 0.99) AS _future_return_99pct
clip(_future_return, _future_return_1pct, _future_return_99pct) AS _clipped_return
c_cbins(_clipped_return, 20) AS label 
""",
    expr_filters="""m_lead(high, 1) != m_lead(low, 1)""",
    expr_tables="""cn_stock_prefactors""",
    extra_fields="""date, instrument""",#指定除了表达式计算的列之外,还需要额外保留的字段。这里保留了 date(日期)和 instrument(股票代码)字段
    order_by="""date, instrument""",
    expr_drop_na=True,
    sql=""" """,
    extract_data=False,
    m_name="""m2"""
)

#training
# @module(position="-244,78", comment="""抽取训练数据""")
m3 = M.extract_data_dai.v17(
    sql=m2.data,##training data
    start_date="""2010-01-01""",
    start_date_bound_to_trading_date=False,
    end_date="""2015-01-01""",
    end_date_bound_to_trading_date=False,
    before_start_days=90,
    debug=False,
    m_name="""m3"""
)

#prediction,
# @module(position="105,2", comment="""抽取预测数据""")
m4 = M.extract_data_dai.v17(
    sql=m0.data,## prediction 
    start_date="""2025-01-01""",
    start_date_bound_to_trading_date=True,
    end_date="""2025-07-22""",
    end_date_bound_to_trading_date=True,
    before_start_days=90,
    debug=False,
    m_name="""m4"""
)

# stock-ranker
# @module(position="-126,211", comment="""模型训练""")
m5_stockranker = M.stockranker.v9(
    train_data=m3.data,
    predict_data=m4.data,
    learning_algorithm="""排序""",
    number_of_leaves=30,
    min_docs_per_leaf=1000,
    number_of_trees=20,
    learning_rate=0.1,
    max_bins=1023,
    feature_fraction=1,
    data_row_fraction=1,
    sort_by="""date,instrument""",
    plot_charts=True,
    ndcg_discount_base=1,
    m_name="""m5_stockranker"""
)

#随机森林
## @module(position="66,119", comment="""""", comment_collapsed=True)
m5_random = M.randomforest.v2(
    input_1=m3.data,
    input_2=m4.data,
    n_estimators=10,
    criterion="""squared_error(回归)""",
    max_depth=20,
    min_samples_split=2,
    min_samples_leaf=2,
    max_features=0.9,
    random=0,
    render_chart=True,
    m_name="""m5_random"""
)

m_lightgbm = M.lightgbm.v4(
    training_ds=m3.data,
    predict_ds=m4.data,
    objective="""回归""",
    num_boost_round=30,
    num_class=1,
    num_leaves=10,
    learning_rate=0.4,
    min_data_in_leaf=1000,
    max_bin=20,
    key_cols="""date,instrument""",
    group_col="""date""",
    random_seed=101,
    log_level="""信息""",
    plot_charts=True,
    m_name="""m_lightgbm"""
)

# @module(position="-404,-613", comment="""""", comment_collapsed=True)
#m5_xgboost = M.xgboost.v6(
#    train_ds=m3.data,
#    test_ds=m4.data,
#    #objective="""排序学习-ranknet""",
#    objective="""排序(pairwise)""",
#    number_of_leaves=30,
#    min_docs_per_leaf=1,
#    number_of_trees=6,
#    number_of_depth=5,
#    learning_rate=0.1,
#    max_bins=1023,
#    feature_fraction=1,
#    data_row_fraction=1,
#    params="""{'tree_method':'approx','learning_rat':0.1,'subsample':0.7}""",
#    params="""""",
#    log_level="""信息""",
#    plot_charts=True,
#    use_gpu=False,
#    m_name="""m5_xgboost"""
#)

#@module(position="-81,302", comment="""等权分配""")
m6 = M.score_to_position.v3(
    input_1=m_lightgbm.predictions,
    score_field="""score DESC""",
    hold_count=5,##选择股票数量
    position_expr="""-- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
-- 在这里输入表达式, 每行一个表达式, 输出仓位字段必须命名为 position, 模块会进一步做归一化
-- 排序倒数: 1 / score_rank AS position
-- 对数下降: 1 / log2(score_rank + 1) AS position
-- TODO 拟合、最优化 ..
-- 等权重分配,股票购买额度计算,每个股票0.1
1 AS position
""",
    total_position=1,#总仓位,100%
    extract_data=True,
    m_name="""m6"""
)

# @module(position="-106,405", comment="""交易,日线,设置初始化函数和K线处理函数,以及初始化资金、基准等""")
m7 = M.bigtrader.v25(
    data=m6.data,
    start_date="""""",
    end_date="""""",
    initialize=m7_initialize_bigquant_run,
    before_trading_start=m7_before_trading_start_bigquant_run,
    handle_tick=m7_handle_tick_bigquant_run,
    handle_data=m7_handle_data_bigquant_run,
    handle_trade=m7_handle_trade_bigquant_run,
    handle_order=m7_handle_order_bigquant_run,
    after_trading=m7_after_trading_bigquant_run,
    capital_base=1000000,
    frequency="""daily""",
    product_type="""股票""",
    rebalance_period_type="""交易日""",
    rebalance_period_days="""5""",
    rebalance_period_roll_forward=True,
    backtest_engine_mode="""标准模式""",
    before_start_days=20,
    volume_limit=1,
    order_price_field_buy="""open""",
    order_price_field_sell="""close""",
    benchmark="""沪深300指数""",
    plot_charts=True,
    debug=False,
    backtest_only=False,
    m_name="""m7""",
    m_cached=False
)
# </aistudiograph>


现在报错
[2025-07-24 13:04:11] INFO: lightgbm.v4 命中缓存[2025-07-24 13:04:12] INFO: lightgbm.v4 运行完成 [0.507s].[2025-07-24 13:04:12] INFO: score_to_position.v3 开始运行 ..[2025-07-24 13:04:12] INFO: extract data ..[2025-07-24 13:04:12] ERROR: dai query failed: WITH _t_f5c0ca36b9d44df1b972570c6c6beeea AS (
SELECT
    _30bcf3415d6b4ae1b930b3a7b15fd82c.* EXCLUDE(position),
    ROW_NUMBER() OVER (PARTITION BY date ORDER BY score DESC, instrument) AS score_rank
FROM _30bcf3415d6b4ae1b930b3a7b15fd82c
QUALIFY
    score_rank <= 5
),
_t_f46351c7064e4ec3a06395cb30da600f AS (
SELECT
    _t_f5c0ca36b9d44df1b972570c6c6beeea.* EXCLUDE(position),
    1 AS position
FROM _30bcf3415d6b4ae1b930b3a7b15fd82c
    JOIN _t_f5c0ca36b9d44df1b972570c6c6beeea USING(date, instrument)
ORDER BY date, position
)
SELECT
    * EXCLUDE(position),
    c_sum(position) AS _position_sum,
    position * 1 / CASE WHEN _position_sum = 0 THEN 1 ELSE _position_sum END AS position
FROM _t_f46351c7064e4ec3a06395cb30da600f
QUALIFY
    position IS NOT NULL
ORDER BY date, position
您可以去社区论坛问答交流板块反馈咨询 去发帖>>
---------------------------------------------------------------------------
InvalidInputException                     Traceback (most recent call last)
Cell In[18], line 242
    201 m_lightgbm = M.lightgbm.v4(
    202     training_ds=m3.data,
    203     predict_ds=m4.data,
   (...)
    216     m_name="""m_lightgbm"""
    217 )
    219 # @module(position="-404,-613", comment="""""", comment_collapsed=True)
    220 #m5_xgboost = M.xgboost.v6(
    221 #    train_ds=m3.data,
   (...)
    240 
    241 #@module(position="-81,302", comment="""等权分配""")
--> 242 m6 = M.score_to_position.v3(
    243     input_1=m_lightgbm.predictions,
    244     score_field="""score DESC""",
    245     hold_count=5,##选择股票数量
    246     position_expr="""-- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
    247 -- 在这里输入表达式, 每行一个表达式, 输出仓位字段必须命名为 position, 模块会进一步做归一化
    248 -- 排序倒数: 1 / score_rank AS position
    249 -- 对数下降: 1 / log2(score_rank + 1) AS position
    250 -- TODO 拟合、最优化 ..
    251 -- 等权重分配,股票购买额度计算,每个股票0.1
    252 1 AS position
    253 """,
    254     total_position=1,#总仓位,100%
    255     extract_data=True,
    256     m_name="""m6"""
    257 )
    259 # @module(position="-106,405", comment="""交易,日线,设置初始化函数和K线处理函数,以及初始化资金、基准等""")
    260 m7 = M.bigtrader.v25(
    261     data=m6.data,
    262     start_date="""""",
   (...)
    287     m_cached=False
    288 )

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/modules.py:28, in __call__(self, **kwargs)

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/moduleinvoker.py:203, in module_invoke(name, version, kwargs)

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/moduleinvoker.py:169, in _module_invoke(name, version, kwargs)

File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/moduleinvoker.py:41, in _module_run(module, kwargs)

File dist/build/score_to_position/v3/__init__.py:236, in v3.run()

File dist/build/score_to_position/v3/__init__.py:201, in v3._create_ds_from_sql()

File dist/build/score_to_position/v3/__init__.py:198, in v3._create_ds_from_sql()

File /var/app/enabled/dai/_telemetry.py:128, in wrapper(*args, **kwargs)

File /var/app/enabled/dai/_functions.py:152, in df(self)

InvalidInputException: Invalid Input Error: Attempting to execute an unsuccessful or closed pending query result
Error: Binder Error: Referenced column "score" not found in FROM clause!
Candidate bindings: "instrument", "prediction"

LINE 1: ..."), row_number() OVER (PARTITION BY date ORDER BY score DESC, instrument) AS score_rank FROM _30bcf3415d6b4ae1b930b3...
                                                                    ^
StockRanker 不会报错,不知道什么问题

\

评论
  • stockranker给出的预测结果是有score字段的,建议您打印一下lightgbm的预测数据,它里面应该是没有score字段的
{link}