lightgbm模型报错,但是stockranker可以,不知道什么问题
由csowen创建,最终由xuxiaoyin 被浏览 6 用户
#https://bigquant.com/codesharev3/9cd9a4a0-4050-40f2-b3a7-8b7652ebff63
from bigmodule import M
# @param(id="m7", name="initialize")
# 交易引擎:初始化函数,只执行一次
def m7_initialize_bigquant_run(context):
from bigtrader.finance.commission import PerOrder
# 系统已经设置了默认的交易手续费和滑点,要修改手续费可使用如下函数
context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))
context.hold_stock = 10
# @param(id="m7", name="before_trading_start")
# 交易引擎:每个单位时间开盘前调用一次。
def m7_before_trading_start_bigquant_run(context, data):
# 盘前处理,订阅行情等
pass
# @param(id="m7", name="handle_tick")
# 交易引擎:tick数据处理函数,每个tick执行一次
def m7_handle_tick_bigquant_run(context, tick):
pass
# @param(id="m7", name="handle_data")
def m7_handle_data_bigquant_run(context, data):
import pandas as pd
# 下一个交易日不是调仓日,则不生成信号
if not context.rebalance_period.is_signal_date(data.current_dt.date()):
return
# 从传入的数据 context.data 中读取今天的信号数据
today_df = context.data[context.data["date"] == data.current_dt.strftime("%Y-%m-%d")]
# 卖出不在目标持有列表中的股票
count=0
for instrument in sorted(set(context.get_account_positions().keys()) - set(today_df["instrument"])):
context.order_target_percent(instrument, 0)
#print ('sell',instrument,data.current_dt.strftime("%Y-%m-%d"))
#买入目标持有列表中的股票
count=0
for i, x in today_df.iterrows():# buy-signal,top-6
num = (0.0 if pd.isnull(x.position) else x.position)
#print (num, i,x.instrument,data.current_dt.strftime("%Y-%m-%d"))
context.order_target_percent(x.instrument, 0.0 if pd.isnull(x.position) else x.position)
# @param(id="m7", name="handle_trade")
# 交易引擎:成交回报处理函数,每个成交发生时执行一次
def m7_handle_trade_bigquant_run(context, trade):
pass
# @param(id="m7", name="handle_order")
# 交易引擎:委托回报处理函数,每个委托变化时执行一次
def m7_handle_order_bigquant_run(context, order):
pass
# @param(id="m7", name="after_trading")
# 交易引擎:盘后处理函数,每日盘后执行一次
def m7_after_trading_bigquant_run(context, data):
pass
#-- 数据使用: 表名.字段名, 对于没有指定表名的列, 会从 expr_tables 推断, 如果同名字段在多个表中出现, 需要显式的给出表名
#-- cn_stock_prefactors https://bigquant.com/data/datas
#pe_ttm -- 滚动市盈率,即股价除以过去12个月的每股收益
#pb -- 市净率
#ps_ttm -- 滚动市销率
#pcf_net_leading -- 现金流比率, 股价与预测净现金流的比值(前瞻性)
#pcf_op_ttm -- 股价与过去12个月经营现金流的比值(历史性)
#total_market_cap -- 总市值 = 总股本 × 股价。
#float_market_cap -- 流通市值 = 流通股 × 股价。
#dividend_yield_ratio -- 股息率,= 年度分红总额 / 总市值 × 100%
#momentum_5 -- 过去5日股价涨幅,反映短期趋势强度
#reversal_5 -- 过去5日股价跌幅,捕捉超卖反弹机会。
#volatility_5 --过去5日收益率的标准差,衡量股价波动幅度,σ = √[∑(每日收益率 - 平均收益率)² / 5]
#turn -- 换手率,日成交量 / 流通股本,反映股票活跃度与资金关注度
m0 = M.input_features_dai.v30(
mode="""SQL""",
sql="""-- 使用DAI SQL获取数据, 构建因子等,
-- DAI SQL 语法: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-sql%E5%85%A5%E9%97%A8%E6%95%99%E7%A8%8B
-- 使用数据输入1/2/3里的字段: e.g. input_1.close, input_1.* EXCLUDE(date, instrument)
SELECT
-- 在这里输入因子表达式
-- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
-- 数据&字段: 数据文档 https://bigquant.com/data/home
m_lag(cn_stock_bar1d.close, 5) / cn_stock_bar1d.close AS return_5,
m_lag(cn_stock_bar1d.close, 10) / cn_stock_bar1d.close AS return_10,
c_pct_rank(return_10) AS rank_return_10,
c_pct_rank(return_5) AS rank_return_5,
cn_stock_bar1d.close / m_lag(cn_stock_bar1d.close, 1) AS return_0,
return_0 / rank_return_5 AS first_a,
rank_return_5 / rank_return_10 AS first_b,
-- rank_return_10,
-- rank_return_5,
volatility_5,
-- 日期和股票代码
date, instrument
FROM cn_stock_bar1d
JOIN cn_stock_prefactors USING (date, instrument)
WHERE
-- WHERE 过滤, 在窗口等计算算子之前执行
-- 剔除ST股票
st_status = 0
AND list_days > 260
-- AND is_risk_warning = 0 -- 剔除那些被监管机构标记为风险警示的股票,进一步降低投资组合的风险。
-- AND cn_stock_bar1d.close/cn_stock_bar1d.adjust_factor > 2
QUALIFY
-- QUALIFY 过滤, 在窗口等计算算子之后执行, 比如 m_lag(close, 3) AS close_3, 对于 close_3 的过滤需要放到这里
-- 去掉有空值的行
COLUMNS(*) IS NOT NULL
-- 按日期和股票代码排序, 从小到大
ORDER BY date, instrument
""",
extra_fields="date, instrument",
order_by="date, instrument",
m_name="""m0"""
)
#add--label
# @module(position="-258,-25", comment="""加数据标注""")
m2 = M.input_features_dai.v30(
input_1=m0.data,
mode="""表达式""",
expr="""-- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
-- 数据&字段: 数据文档 https://bigquant.com/data/home
-- 数据使用: 表名.字段名, 对于没有指定表名的列, 会从 expr_tables 推断, 如果同名字段在多个表中出现, 需要显式的给出表名
-- 从输入数据 input_1 中选择所有列,但排除 date 和 instrument 这两列。
input_1.* EXCLUDE(date, instrument)
-- 计算股票标签,使用 m_lead 函数计算未来收益率。m_lead(open, 5) 获取未来第 5 天的开盘价,
m_lead(close, 2) / m_lead(open, 1) AS _future_return
c_quantile_cont(_future_return, 0.01) AS _future_return_1pct
c_quantile_cont(_future_return, 0.99) AS _future_return_99pct
clip(_future_return, _future_return_1pct, _future_return_99pct) AS _clipped_return
c_cbins(_clipped_return, 20) AS label
""",
expr_filters="""m_lead(high, 1) != m_lead(low, 1)""",
expr_tables="""cn_stock_prefactors""",
extra_fields="""date, instrument""",#指定除了表达式计算的列之外,还需要额外保留的字段。这里保留了 date(日期)和 instrument(股票代码)字段
order_by="""date, instrument""",
expr_drop_na=True,
sql=""" """,
extract_data=False,
m_name="""m2"""
)
#training
# @module(position="-244,78", comment="""抽取训练数据""")
m3 = M.extract_data_dai.v17(
sql=m2.data,##training data
start_date="""2010-01-01""",
start_date_bound_to_trading_date=False,
end_date="""2015-01-01""",
end_date_bound_to_trading_date=False,
before_start_days=90,
debug=False,
m_name="""m3"""
)
#prediction,
# @module(position="105,2", comment="""抽取预测数据""")
m4 = M.extract_data_dai.v17(
sql=m0.data,## prediction
start_date="""2025-01-01""",
start_date_bound_to_trading_date=True,
end_date="""2025-07-22""",
end_date_bound_to_trading_date=True,
before_start_days=90,
debug=False,
m_name="""m4"""
)
# stock-ranker
# @module(position="-126,211", comment="""模型训练""")
m5_stockranker = M.stockranker.v9(
train_data=m3.data,
predict_data=m4.data,
learning_algorithm="""排序""",
number_of_leaves=30,
min_docs_per_leaf=1000,
number_of_trees=20,
learning_rate=0.1,
max_bins=1023,
feature_fraction=1,
data_row_fraction=1,
sort_by="""date,instrument""",
plot_charts=True,
ndcg_discount_base=1,
m_name="""m5_stockranker"""
)
#随机森林
## @module(position="66,119", comment="""""", comment_collapsed=True)
m5_random = M.randomforest.v2(
input_1=m3.data,
input_2=m4.data,
n_estimators=10,
criterion="""squared_error(回归)""",
max_depth=20,
min_samples_split=2,
min_samples_leaf=2,
max_features=0.9,
random=0,
render_chart=True,
m_name="""m5_random"""
)
m_lightgbm = M.lightgbm.v4(
training_ds=m3.data,
predict_ds=m4.data,
objective="""回归""",
num_boost_round=30,
num_class=1,
num_leaves=10,
learning_rate=0.4,
min_data_in_leaf=1000,
max_bin=20,
key_cols="""date,instrument""",
group_col="""date""",
random_seed=101,
log_level="""信息""",
plot_charts=True,
m_name="""m_lightgbm"""
)
# @module(position="-404,-613", comment="""""", comment_collapsed=True)
#m5_xgboost = M.xgboost.v6(
# train_ds=m3.data,
# test_ds=m4.data,
# #objective="""排序学习-ranknet""",
# objective="""排序(pairwise)""",
# number_of_leaves=30,
# min_docs_per_leaf=1,
# number_of_trees=6,
# number_of_depth=5,
# learning_rate=0.1,
# max_bins=1023,
# feature_fraction=1,
# data_row_fraction=1,
# params="""{'tree_method':'approx','learning_rat':0.1,'subsample':0.7}""",
# params="""""",
# log_level="""信息""",
# plot_charts=True,
# use_gpu=False,
# m_name="""m5_xgboost"""
#)
#@module(position="-81,302", comment="""等权分配""")
m6 = M.score_to_position.v3(
input_1=m_lightgbm.predictions,
score_field="""score DESC""",
hold_count=5,##选择股票数量
position_expr="""-- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
-- 在这里输入表达式, 每行一个表达式, 输出仓位字段必须命名为 position, 模块会进一步做归一化
-- 排序倒数: 1 / score_rank AS position
-- 对数下降: 1 / log2(score_rank + 1) AS position
-- TODO 拟合、最优化 ..
-- 等权重分配,股票购买额度计算,每个股票0.1
1 AS position
""",
total_position=1,#总仓位,100%
extract_data=True,
m_name="""m6"""
)
# @module(position="-106,405", comment="""交易,日线,设置初始化函数和K线处理函数,以及初始化资金、基准等""")
m7 = M.bigtrader.v25(
data=m6.data,
start_date="""""",
end_date="""""",
initialize=m7_initialize_bigquant_run,
before_trading_start=m7_before_trading_start_bigquant_run,
handle_tick=m7_handle_tick_bigquant_run,
handle_data=m7_handle_data_bigquant_run,
handle_trade=m7_handle_trade_bigquant_run,
handle_order=m7_handle_order_bigquant_run,
after_trading=m7_after_trading_bigquant_run,
capital_base=1000000,
frequency="""daily""",
product_type="""股票""",
rebalance_period_type="""交易日""",
rebalance_period_days="""5""",
rebalance_period_roll_forward=True,
backtest_engine_mode="""标准模式""",
before_start_days=20,
volume_limit=1,
order_price_field_buy="""open""",
order_price_field_sell="""close""",
benchmark="""沪深300指数""",
plot_charts=True,
debug=False,
backtest_only=False,
m_name="""m7""",
m_cached=False
)
# </aistudiograph>
现在报错
[2025-07-24 13:04:11] INFO: lightgbm.v4 命中缓存[2025-07-24 13:04:12] INFO: lightgbm.v4 运行完成 [0.507s].[2025-07-24 13:04:12] INFO: score_to_position.v3 开始运行 ..[2025-07-24 13:04:12] INFO: extract data ..[2025-07-24 13:04:12] ERROR: dai query failed: WITH _t_f5c0ca36b9d44df1b972570c6c6beeea AS (
SELECT
_30bcf3415d6b4ae1b930b3a7b15fd82c.* EXCLUDE(position),
ROW_NUMBER() OVER (PARTITION BY date ORDER BY score DESC, instrument) AS score_rank
FROM _30bcf3415d6b4ae1b930b3a7b15fd82c
QUALIFY
score_rank <= 5
),
_t_f46351c7064e4ec3a06395cb30da600f AS (
SELECT
_t_f5c0ca36b9d44df1b972570c6c6beeea.* EXCLUDE(position),
1 AS position
FROM _30bcf3415d6b4ae1b930b3a7b15fd82c
JOIN _t_f5c0ca36b9d44df1b972570c6c6beeea USING(date, instrument)
ORDER BY date, position
)
SELECT
* EXCLUDE(position),
c_sum(position) AS _position_sum,
position * 1 / CASE WHEN _position_sum = 0 THEN 1 ELSE _position_sum END AS position
FROM _t_f46351c7064e4ec3a06395cb30da600f
QUALIFY
position IS NOT NULL
ORDER BY date, position
您可以去社区论坛问答交流板块反馈咨询 去发帖>>
---------------------------------------------------------------------------
InvalidInputException Traceback (most recent call last)
Cell In[18], line 242
201 m_lightgbm = M.lightgbm.v4(
202 training_ds=m3.data,
203 predict_ds=m4.data,
(...)
216 m_name="""m_lightgbm"""
217 )
219 # @module(position="-404,-613", comment="""""", comment_collapsed=True)
220 #m5_xgboost = M.xgboost.v6(
221 # train_ds=m3.data,
(...)
240
241 #@module(position="-81,302", comment="""等权分配""")
--> 242 m6 = M.score_to_position.v3(
243 input_1=m_lightgbm.predictions,
244 score_field="""score DESC""",
245 hold_count=5,##选择股票数量
246 position_expr="""-- DAI SQL 算子/函数: https://bigquant.com/wiki/doc/dai-PLSbc1SbZX#h-%E5%87%BD%E6%95%B0
247 -- 在这里输入表达式, 每行一个表达式, 输出仓位字段必须命名为 position, 模块会进一步做归一化
248 -- 排序倒数: 1 / score_rank AS position
249 -- 对数下降: 1 / log2(score_rank + 1) AS position
250 -- TODO 拟合、最优化 ..
251 -- 等权重分配,股票购买额度计算,每个股票0.1
252 1 AS position
253 """,
254 total_position=1,#总仓位,100%
255 extract_data=True,
256 m_name="""m6"""
257 )
259 # @module(position="-106,405", comment="""交易,日线,设置初始化函数和K线处理函数,以及初始化资金、基准等""")
260 m7 = M.bigtrader.v25(
261 data=m6.data,
262 start_date="""""",
(...)
287 m_cached=False
288 )
File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/modules.py:28, in __call__(self, **kwargs)
File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/moduleinvoker.py:203, in module_invoke(name, version, kwargs)
File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/moduleinvoker.py:169, in _module_invoke(name, version, kwargs)
File /opt/pyenv/versions/3.11.8/lib/python3.11/site-packages/bigmodule/moduleinvoker.py:41, in _module_run(module, kwargs)
File dist/build/score_to_position/v3/__init__.py:236, in v3.run()
File dist/build/score_to_position/v3/__init__.py:201, in v3._create_ds_from_sql()
File dist/build/score_to_position/v3/__init__.py:198, in v3._create_ds_from_sql()
File /var/app/enabled/dai/_telemetry.py:128, in wrapper(*args, **kwargs)
File /var/app/enabled/dai/_functions.py:152, in df(self)
InvalidInputException: Invalid Input Error: Attempting to execute an unsuccessful or closed pending query result
Error: Binder Error: Referenced column "score" not found in FROM clause!
Candidate bindings: "instrument", "prediction"
LINE 1: ..."), row_number() OVER (PARTITION BY date ORDER BY score DESC, instrument) AS score_rank FROM _30bcf3415d6b4ae1b930b3...
^
StockRanker 不会报错,不知道什么问题
\