In [1]:
import dai
import pandas as pd

data_id = 'test_20231221_1'

日成交量博弈-收益率

In [2]:
# 将每分钟成交量按照过去5分钟的收益率从小到大进行排序,获得排序后的1分钟成交量。
# 累加求和,得到正序成交量之和
# 2)按照同样的方式,将每分钟成交量按照过去5分钟的收益率从大到小排序后再累加求和,得到倒序成交量之和

# 这里存在一些问题:收益率相同的话,成交量应该怎样排序?这里先用和收益率相同的排序,考虑要不要做成都按成交量正序排序
# 成交量决定未来的收益率?还是过去的收益率决定当下的成交量?这里应该是收益率决定成交量的思想,考虑要不要反过来做


def calc_alpha(instrument,date):
    df = dai.query(f'''
    select date, volume, close / M_LAG(close, 5) as return_5
    from cn_stock_bar1m 
    where instrument='{instrument}' 
    and date>'{date} 09:30:00'
    and date<'{date} 14:58:00'
    order by date asc
    ''').df().dropna()[['volume','date','return_5']]

    se_1 = df.sort_values(by=['return_5','date'], ascending=(True,True)).reset_index(drop=True)['volume'].cumsum()
    se_2 = df.sort_values(by=['return_5','date'], ascending=(False,True)).reset_index(drop=True)['volume'].cumsum()

    # 3)将上述正序成交量减去倒序成交量之后再累加求和,即可得到日频因子“日成交量博弈-收益率”因子。

    se_3 = []
    for i in range(0,len(se_1)):
        se_3.append(se_1.iloc[i] - se_2.iloc[i])

    se_3 = pd.Series(se_3)
    return se_3.sum()
In [3]:
def save(data):
    import dai
    try:
        data[dai.DEFAULT_PARTITION_FIELD] = data["date"].apply(lambda x: f'{x.year}')
        dai.DataSource.write_bdb(
            data=data,
            id=data_id,
            unique_together=["date","instrument"],
            indexes=["date","instrument"],
        )
    except Exception as e:
        print('err: ', e)
In [4]:
marker_all_trading_days = '2023-12-14'
all_trading_days = dai.query(f"""select * from all_trading_days where date<='{marker_all_trading_days}' order by date desc limit 100""").df()
all_trading_days.iloc[99]['date']
Out[4]:
Timestamp('2023-07-20 00:00:00')
In [5]:
# cn_stock_instruments = dai.query(f"""select instrument,name from cn_stock_index_component where date='{marker_all_trading_days}' group by instrument,name """).df()
# cn_stock_instruments = dai.query(f"""select member_code from cn_stock_index_component where date='{marker_all_trading_days}' and instrument='000852.SH' and member_code not like '30%'""").df()
# pd.set_option('display.max_rows',None)
# cn_stock_instruments
In [6]:
for date in all_trading_days['date']:
    _date = date.strftime('%Y-%m-%d')
    cn_stock_instruments = dai.query(f"""select member_code from cn_stock_index_component where date='{_date}' and instrument='000852.SH' and member_code not like '30%'""").df()
    for ins in cn_stock_instruments['member_code']:
        alpha = calc_alpha(ins,_date)
        save(pd.DataFrame({"date":[date],"instrument":[ins],"alpha_1":[float(alpha)]}))
    print(date)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
KeyboardInterrupt: 

The above exception was the direct cause of the following exception:

RuntimeError                              Traceback (most recent call last)
Cell In[6], line 5
      3 cn_stock_instruments = dai.query(f"""select member_code from cn_stock_index_component where date='{_date}' and instrument='000852.SH' and member_code not like '30%'""").df()
      4 for ins in cn_stock_instruments['member_code']:
----> 5     alpha = calc_alpha(ins,_date)
      6     save(pd.DataFrame({"date":[date],"instrument":[ins],"alpha_1":[float(alpha)]}))
      7 print(date)

Cell In[2], line 10, in calc_alpha(instrument, date)
      9 def calc_alpha(instrument,date):
---> 10     df = dai.query(f'''
     11     select date, volume, close / M_LAG(close, 5) as return_5
     12     from cn_stock_bar1m 
     13     where instrument='{instrument}' 
     14     and date>'{date} 09:30:00'
     15     and date<'{date} 14:58:00'
     16     order by date asc
     17     ''').df().dropna()[['volume','date','return_5']]
     19     se_1 = df.sort_values(by=['return_5','date'], ascending=(True,True)).reset_index(drop=True)['volume'].cumsum()
     20     se_2 = df.sort_values(by=['return_5','date'], ascending=(False,True)).reset_index(drop=True)['volume'].cumsum()

File /var/app/enabled/dai/_telemetry.py:85, in wrapper(*args, **kwargs)

File /var/app/enabled/dai/_functions.py:61, in df(self)

RuntimeError: Query interrupted