2.0平台迁移dai 数据问题
由csowen创建,最终由csowen 被浏览 2 用户
import dai
from biglearning.module2.common.data import Outputs
from biglearning.api import M
## 定义时间范围
start_date = "2010-01-01"
end_date = "2015-01-01"
## 构建SQL查询以获取所有股票的数据
sql_query = f"""
SELECT * FROM cn_stock_bar1d
WHERE date >= '{start_date}'
AND date <= '{end_date}'
"""
## 执行查询,获取所有股票的日线数据
df_all_stocks = dai.query(sql_query).df()
df_all_stocks['instrument']=df_all_stocks['instrument'].apply(lambda x:x+'A')
if not df_all_stocks.empty:
# 进行后续操作
print(df_all_stocks.head(5))
else:
print("没有获取到数据。")
# 将DataFrame写入一个新的DataSource对象
data_source_id = "all_train_stocks_data_source"
dai.DataSource.write_bdb(df_all_stocks, id=data_source_id)
data_source = dai.DataSource(data_source_id)
# 继续使用m1进行后续操作
m2 = M.advanced_auto_labeler.v2(
instruments=data_source,
label_expr="""
##号开始的表示注释
# 0. 每行一个,顺序执行,从第二个开始,可以使用label字段
# 1. 可用数据字段见 https://bigquant.com/docs/data_history_data.html
# 添加benchmark_前缀,可使用对应的benchmark数据
# 2. 可用操作符和函数见 `表达式引擎 <https://bigquant.com/docs/big_expr.html>`_
# 计算收益:2日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)
shift(close, -2) / shift(open, -1) #-3 ~-34%
# 极值处理:用1%和99%分位的值做clip
clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))
# 将分数映射到分类,这里使用20个分类
all_wbins(label, 20)
# 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)
where(shift(high, -1) == shift(low, -1), NaN, label)
""",
start_date='',
end_date='',
benchmark='000300.SHA',
drop_na_label=True,
cast_label_int=True
)
print(type(m2))
日志 38 条 ▼
date instrument name adjust_factor pre_close open \
0 2010-01-04 000001.SZ 深发展A 35.905533 875.017835 880.403665
1 2010-01-04 000002.SZ 万科A 110.804108 1197.792404 1202.224568
2 2010-01-04 000004.SZ *ST国农 4.063862 40.638618 NaN
3 2010-01-04 000005.SZ 世纪星源 9.267600 55.790952 55.698276
4 2010-01-04 000006.SZ 深振业A 11.023515 124.896422 124.896422
... ... ... ... ... ... ...
2783656 2014-06-10 300121.SZ 阳谷华泰 4.743807 41.318558 40.464672
2783657 2014-06-10 300122.SZ 智飞生物 2.056240 50.295632 50.377882
2783658 2014-06-10 300123.SZ 太阳鸟 3.346824 26.741123 26.607250
2783659 2014-06-10 300124.SZ 汇川技术 7.607486 194.599486 194.371261
2783660 2014-06-10 300125.SZ 易世达 2.029911 30.347170 30.306572
close high low volume deal_number \
0 851.320184 882.557997 850.243018 24192276 20836
1 1174.523541 1204.440650 1174.523541 96983253 68592
2 NaN NaN NaN 0 0
3 55.512924 56.068980 54.771516 22358222 12059
4 122.581484 125.116892 122.471249 6299805 4417
... ... ... ... ... ...
2783656 42.267319 42.646823 39.468473 19391717 8390
2783657 50.131133 50.624631 49.493699 695249 745
2783658 26.640718 26.841528 25.971354 2300279 2127
2783659 201.598373 202.359122 194.066962 4616482 5059
2783660 30.773451 30.793750 30.062983 1596815 1281
amount change_ratio turn upper_limit lower_limit
0 5.802495e+08 -0.027082 0.008273 962.627335 787.408335
1 1.034345e+09 -0.019426 0.010044 1317.460840 1078.123968
2 NaN 0.000000 0.000000 999999.990000 0.010000
3 1.334784e+08 -0.004983 0.024469 61.351512 50.230392
4 7.054856e+07 -0.018535 0.012770 137.352994 112.439850
... ... ... ... ... ...
2783656 1.674433e+08 0.022962 0.147244 45.445670 37.191446
2783657 1.698289e+07 -0.003271 0.002163 55.333421 45.257844
2783658 1.815866e+07 -0.003755 0.008792 29.418582 24.063664
2783659 1.206833e+08 0.035966 0.007595 214.074650 175.124322
2783660 2.401733e+07 0.014047 0.022029 33.392037 27.322603
[2783661 rows x 16 columns]
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
Cell In[16], line 201
198 dai.DataSource.write_bdb(df_all_stocks, id=data_source_id)
199 ##创建一个DataSource对象以便后续使用
200 #data_source = dai.DataSource(data_source_id)
--> 201 data_source = data.DataSource('cn_stock_bar1d')
202 m1=Outputs(data=data_source)
207 m2 = M.advanced_auto_labeler.v2(
208 instruments=m1.data,
209 label_expr="""
(...)
231 cast_label_int=True
232 )
NameError: name 'data' is not defined
日志 21 条 ▼
date instrument name adjust_factor pre_close open \
0 2010-01-04 000001.SZA 深发展A 35.905533 875.017835 880.403665
1 2010-01-04 000002.SZA 万科A 110.804108 1197.792404 1202.224568
2 2010-01-04 000004.SZA *ST国农 4.063862 40.638618 NaN
3 2010-01-04 000005.SZA 世纪星源 9.267600 55.790952 55.698276
4 2010-01-04 000006.SZA 深振业A 11.023515 124.896422 124.896422
close high low volume deal_number amount \
0 851.320184 882.557997 850.243018 24192276 20836 5.802495e+08
1 1174.523541 1204.440650 1174.523541 96983253 68592 1.034345e+09
2 NaN NaN NaN 0 0 NaN
3 55.512924 56.068980 54.771516 22358222 12059 1.334784e+08
4 122.581484 125.116892 122.471249 6299805 4417 7.054856e+07
change_ratio turn upper_limit lower_limit
0 -0.027082 0.008273 962.627335 787.408335
1 -0.019426 0.010044 1317.460840 1078.123968
2 0.000000 0.000000 999999.990000 0.010000
3 -0.004983 0.024469 61.351512 50.230392
4 -0.018535 0.012770 137.352994 112.439850
[2025-01-27 11:23:36.931298] INFO: moduleinvoker:671852176.py:27:<module> advanced_auto_labeler.v2 开始运行..
[2025-01-27 11:23:39.446849] ERROR: moduleinvoker:671852176.py:27:<module> module name: advanced_auto_labeler, module version: v2, trackeback: ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[15], line 27
24 data_source = dai.DataSource(data_source_id)
26 # 继续使用m1进行后续操作
---> 27 m2 = M.advanced_auto_labeler.v2(
28 instruments=data_source,
29 label_expr="""
30 ##号开始的表示注释
31 # 0. 每行一个,顺序执行,从第二个开始,可以使用label字段
32 # 1. 可用数据字段见 https://bigquant.com/docs/data_history_data.html
33 # 添加benchmark_前缀,可使用对应的benchmark数据
34 # 2. 可用操作符和函数见 `表达式引擎 `_
35 # 计算收益:2日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)
36 shift(close, -2) / shift(open, -1) #-3 ~-34%
37
38 # 极值处理:用1%和99%分位的值做clip
39 clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))
40
41 # 将分数映射到分类,这里使用20个分类
42 all_wbins(label, 20)
43
44 # 过滤掉一字涨停的情况 (设置label为NaN,在后续处理和训练中会忽略NaN的label)
45 where(shift(high, -1) == shift(low, -1), NaN, label)
46 """,
47 start_date='',
48 end_date='',
49 benchmark='000300.SHA',
50 drop_na_label=True,
51 cast_label_int=True
52 )
53 print(type(m2))
File module2/common/modulemanagerv2.py:88, in biglearning.module2.common.modulemanagerv2.BigQuantModuleVersion.__call__()
File module2/common/moduleinvoker.py:370, in biglearning.module2.common.moduleinvoker.module_invoke()
File module2/common/moduleinvoker.py:292, in biglearning.module2.common.moduleinvoker._invoke_with_cache()
File module2/common/moduleinvoker.py:253, in biglearning.module2.common.moduleinvoker._invoke_with_cache()
File module2/common/moduleinvoker.py:212, in biglearning.module2.common.moduleinvoker._module_run()
File module2/modules/advanced_auto_labeler/v2/__init__.py:123, in biglearning.module2.modules.advanced_auto_labeler.v2.__init__.BigQuantModule.run()
File module2/modules/advanced_auto_labeler/v2/__init__.py:101, in biglearning.module2.modules.advanced_auto_labeler.v2.__init__.BigQuantModule.__load_data()
File /var/app/enabled/bigdatasource/api/datareader.py:283, in history_data(self, instruments, start_date, end_date, fields, market)
File /var/app/enabled/bigdatasource/api/v6/bigdatasource.py:74, in read(self, instruments, start_date, end_date, fields, query, product_codes, **kwargs)
File /usr/local/python3/lib/python3.8/site-packages/pandas/core/generic.py:1441, in NDFrame.__nonzero__(self)
1439 @final
1440 def __nonzero__(self):
-> 1441 raise ValueError(
1442 f"The truth value of a {type(self).__name__} is ambiguous. "
1443 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
1444 )
ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
\