【平台使用】2.0平台迁移dai 数据问题

由csowen创建，最终由small_q更新于2025-02-11 10:23 被浏览 34 用户
import dai
from biglearning.module2.common.data import Outputs
from biglearning.api import M
## 定义时间范围
start_date = "2010-01-01"
end_date = "2015-01-01"
## 构建SQL查询以获取所有股票的数据
sql_query = f"""
SELECT * FROM cn_stock_bar1d
WHERE date >= '{start_date}' 
AND date <= '{end_date}'
"""
## 执行查询，获取所有股票的日线数据
df_all_stocks = dai.query(sql_query).df()
df_all_stocks['instrument']=df_all_stocks['instrument'].apply(lambda x:x+'A')
if not df_all_stocks.empty:
    # 进行后续操作
    print(df_all_stocks.head(5))
else:
    print("没有获取到数据。")
# 将DataFrame写入一个新的DataSource对象
data_source_id = "all_train_stocks_data_source"
dai.DataSource.write_bdb(df_all_stocks, id=data_source_id)
data_source = dai.DataSource(data_source_id)

# 继续使用m1进行后续操作
m2 = M.advanced_auto_labeler.v2(
    instruments=data_source,
    label_expr="""
    ##号开始的表示注释
# 0. 每行一个，顺序执行，从第二个开始，可以使用label字段
# 1. 可用数据字段见 https://bigquant.com/docs/data_history_data.html
#   添加benchmark_前缀，可使用对应的benchmark数据
# 2. 可用操作符和函数见 `表达式引擎 <https://bigquant.com/docs/big_expr.html>`_
# 计算收益：2日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)
shift(close, -2) / shift(open, -1) #-3 ~-34%

# 极值处理：用1%和99%分位的值做clip
clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))

# 将分数映射到分类，这里使用20个分类
all_wbins(label, 20)

# 过滤掉一字涨停的情况 (设置label为NaN，在后续处理和训练中会忽略NaN的label)
where(shift(high, -1) == shift(low, -1), NaN, label)
""",
    start_date='',
    end_date='',
    benchmark='000300.SHA',
    drop_na_label=True,
    cast_label_int=True
)
print(type(m2))

日志 38 条 ▼
              date instrument   name  adjust_factor    pre_close         open  \
0       2010-01-04  000001.SZ   深发展A      35.905533   875.017835   880.403665   
1       2010-01-04  000002.SZ    万科A     110.804108  1197.792404  1202.224568   
2       2010-01-04  000004.SZ  *ST国农       4.063862    40.638618          NaN   
3       2010-01-04  000005.SZ   世纪星源       9.267600    55.790952    55.698276   
4       2010-01-04  000006.SZ   深振业A      11.023515   124.896422   124.896422   
...            ...        ...    ...            ...          ...          ...   
2783656 2014-06-10  300121.SZ   阳谷华泰       4.743807    41.318558    40.464672   
2783657 2014-06-10  300122.SZ   智飞生物       2.056240    50.295632    50.377882   
2783658 2014-06-10  300123.SZ    太阳鸟       3.346824    26.741123    26.607250   
2783659 2014-06-10  300124.SZ   汇川技术       7.607486   194.599486   194.371261   
2783660 2014-06-10  300125.SZ    易世达       2.029911    30.347170    30.306572   
               close         high          low    volume  deal_number  \
0         851.320184   882.557997   850.243018  24192276        20836   
1        1174.523541  1204.440650  1174.523541  96983253        68592   
2                NaN          NaN          NaN         0            0   
3          55.512924    56.068980    54.771516  22358222        12059   
4         122.581484   125.116892   122.471249   6299805         4417   
...              ...          ...          ...       ...          ...   
2783656    42.267319    42.646823    39.468473  19391717         8390   
2783657    50.131133    50.624631    49.493699    695249          745   
2783658    26.640718    26.841528    25.971354   2300279         2127   
2783659   201.598373   202.359122   194.066962   4616482         5059   
2783660    30.773451    30.793750    30.062983   1596815         1281   
               amount  change_ratio      turn    upper_limit  lower_limit  
0        5.802495e+08     -0.027082  0.008273     962.627335   787.408335  
1        1.034345e+09     -0.019426  0.010044    1317.460840  1078.123968  
2                 NaN      0.000000  0.000000  999999.990000     0.010000  
3        1.334784e+08     -0.004983  0.024469      61.351512    50.230392  
4        7.054856e+07     -0.018535  0.012770     137.352994   112.439850  
...               ...           ...       ...            ...          ...  
2783656  1.674433e+08      0.022962  0.147244      45.445670    37.191446  
2783657  1.698289e+07     -0.003271  0.002163      55.333421    45.257844  
2783658  1.815866e+07     -0.003755  0.008792      29.418582    24.063664  
2783659  1.206833e+08      0.035966  0.007595     214.074650   175.124322  
2783660  2.401733e+07      0.014047  0.022029      33.392037    27.322603  
[2783661 rows x 16 columns]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[16], line 201
    198 dai.DataSource.write_bdb(df_all_stocks, id=data_source_id)
    199 ##创建一个DataSource对象以便后续使用
    200 #data_source = dai.DataSource(data_source_id)
--> 201 data_source = data.DataSource('cn_stock_bar1d')
    202 m1=Outputs(data=data_source)
    207 m2 = M.advanced_auto_labeler.v2(
    208     instruments=m1.data,
    209     label_expr="""
   (...)
    231     cast_label_int=True
    232 )
NameError: name 'data' is not defined
日志 21 条 ▼
        date  instrument   name  adjust_factor    pre_close         open  \
0 2010-01-04  000001.SZA   深发展A      35.905533   875.017835   880.403665   
1 2010-01-04  000002.SZA    万科A     110.804108  1197.792404  1202.224568   
2 2010-01-04  000004.SZA  *ST国农       4.063862    40.638618          NaN   
3 2010-01-04  000005.SZA   世纪星源       9.267600    55.790952    55.698276   
4 2010-01-04  000006.SZA   深振业A      11.023515   124.896422   124.896422   
         close         high          low    volume  deal_number        amount  \
0   851.320184   882.557997   850.243018  24192276        20836  5.802495e+08   
1  1174.523541  1204.440650  1174.523541  96983253        68592  1.034345e+09   
2          NaN          NaN          NaN         0            0           NaN   
3    55.512924    56.068980    54.771516  22358222        12059  1.334784e+08   
4   122.581484   125.116892   122.471249   6299805         4417  7.054856e+07   
   change_ratio      turn    upper_limit  lower_limit  
0     -0.027082  0.008273     962.627335   787.408335  
1     -0.019426  0.010044    1317.460840  1078.123968  
2      0.000000  0.000000  999999.990000     0.010000  
3     -0.004983  0.024469      61.351512    50.230392  
4     -0.018535  0.012770     137.352994   112.439850  
[2025-01-27 11:23:36.931298] INFO: moduleinvoker:671852176.py:27:<module> advanced_auto_labeler.v2 开始运行..
[2025-01-27 11:23:39.446849] ERROR: moduleinvoker:671852176.py:27:<module> module name: advanced_auto_labeler, module version: v2, trackeback: ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[15], line 27
     24 data_source = dai.DataSource(data_source_id)
     26 # 继续使用m1进行后续操作
---> 27 m2 = M.advanced_auto_labeler.v2(
     28     instruments=data_source,
     29     label_expr="""
     30     ##号开始的表示注释
     31 # 0. 每行一个，顺序执行，从第二个开始，可以使用label字段
     32 # 1. 可用数据字段见 https://bigquant.com/docs/data_history_data.html
     33 #   添加benchmark_前缀，可使用对应的benchmark数据
     34 # 2. 可用操作符和函数见 `表达式引擎 `_
     35 # 计算收益：2日收盘价(作为卖出价格)除以明日开盘价(作为买入价格)
     36 shift(close, -2) / shift(open, -1) #-3 ~-34%
     37 
     38 # 极值处理：用1%和99%分位的值做clip
     39 clip(label, all_quantile(label, 0.01), all_quantile(label, 0.99))
     40 
     41 # 将分数映射到分类，这里使用20个分类
     42 all_wbins(label, 20)
     43 
     44 # 过滤掉一字涨停的情况 (设置label为NaN，在后续处理和训练中会忽略NaN的label)
     45 where(shift(high, -1) == shift(low, -1), NaN, label)
     46 """,
     47     start_date='',
     48     end_date='',
     49     benchmark='000300.SHA',
     50     drop_na_label=True,
     51     cast_label_int=True
     52 )
     53 print(type(m2))
File module2/common/modulemanagerv2.py:88, in biglearning.module2.common.modulemanagerv2.BigQuantModuleVersion.__call__()
File module2/common/moduleinvoker.py:370, in biglearning.module2.common.moduleinvoker.module_invoke()
File module2/common/moduleinvoker.py:292, in biglearning.module2.common.moduleinvoker._invoke_with_cache()
File module2/common/moduleinvoker.py:253, in biglearning.module2.common.moduleinvoker._invoke_with_cache()
File module2/common/moduleinvoker.py:212, in biglearning.module2.common.moduleinvoker._module_run()
File module2/modules/advanced_auto_labeler/v2/__init__.py:123, in biglearning.module2.modules.advanced_auto_labeler.v2.__init__.BigQuantModule.run()
File module2/modules/advanced_auto_labeler/v2/__init__.py:101, in biglearning.module2.modules.advanced_auto_labeler.v2.__init__.BigQuantModule.__load_data()
File /var/app/enabled/bigdatasource/api/datareader.py:283, in history_data(self, instruments, start_date, end_date, fields, market)
File /var/app/enabled/bigdatasource/api/v6/bigdatasource.py:74, in read(self, instruments, start_date, end_date, fields, query, product_codes, **kwargs)
File /usr/local/python3/lib/python3.8/site-packages/pandas/core/generic.py:1441, in NDFrame.__nonzero__(self)
   1439 @final
   1440 def __nonzero__(self):
-> 1441     raise ValueError(
   1442         f"The truth value of a {type(self).__name__} is ambiguous. "
   1443         "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
   1444     )
ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
【平台使用】2.0平台迁移dai 数据问题

标签