复制链接
克隆策略
In [1]:
from bigdatasource.api import DataSource
from biglearning.api import M
from biglearning.api import tools as T
from biglearning.module2.common.data import Outputs
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

#并行任务
from joblib import Parallel, delayed
import copy
In [2]:
m1 = M.instruments.v2(
    start_date='2020-04-01',
    end_date='2023-04-28',
    market='CN_STOCK_A',
    instrument_list='',
    max_count=0
)

m2 = M.input_features.v1(
    features="""
# #号开始的表示注释,注释需单独一行
# 多个特征,每行一个,可以包含基础特征和衍生特征,特征须为本平台特征
换手=turn_0

成交量=volume_0

流通市值=market_cap_float_0

收盘价=close_0

开盘价=open_0

成交额=amount_0

最高价=high_0

最低价=low_0"""
)


m3 = M.general_feature_extractor.v7(
    instruments=m1.data,
    features=m2.data,
    start_date='',
    end_date='',
    before_start_days=500
)

m4 = M.derived_feature_extractor.v3(
    input_data=m3.data,
    features=m2.data,
    date_col='date',
    instrument_col='instrument',
    drop_na=False,
    remove_extra_columns=False,
    user_functions={}
)

m5 = M.chinaa_stock_filter.v1(
    input_data=m4.data,
    index_constituent_cond=['全部'],
    board_cond=['上证主板', '深证主板', '创业板'],
    industry_cond=['全部'],
    st_cond=['正常'],
    delist_cond=['非退市'],
    output_left_data=False
)
[2023-04-30 18:49:23.942144] INFO moduleinvoker: instruments.v2 开始运行..
[2023-04-30 18:49:23.950115] INFO moduleinvoker: 命中缓存
[2023-04-30 18:49:23.951489] INFO moduleinvoker: instruments.v2 运行完成[0.009412s].
[2023-04-30 18:49:23.957863] INFO moduleinvoker: input_features.v1 开始运行..
[2023-04-30 18:49:23.965533] INFO moduleinvoker: 命中缓存
[2023-04-30 18:49:23.966842] INFO moduleinvoker: input_features.v1 运行完成[0.008992s].
[2023-04-30 18:49:24.002916] INFO moduleinvoker: general_feature_extractor.v7 开始运行..
[2023-04-30 18:49:24.009535] INFO moduleinvoker: 命中缓存
[2023-04-30 18:49:24.011056] INFO moduleinvoker: general_feature_extractor.v7 运行完成[0.00815s].
[2023-04-30 18:49:24.028634] INFO moduleinvoker: derived_feature_extractor.v3 开始运行..
[2023-04-30 18:49:24.035166] INFO moduleinvoker: 命中缓存
[2023-04-30 18:49:24.036585] INFO moduleinvoker: derived_feature_extractor.v3 运行完成[0.007957s].
[2023-04-30 18:49:24.049999] INFO moduleinvoker: chinaa_stock_filter.v1 开始运行..
[2023-04-30 18:49:24.056334] INFO moduleinvoker: 命中缓存
[2023-04-30 18:49:24.057725] INFO moduleinvoker: chinaa_stock_filter.v1 运行完成[0.007721s].
In [3]:
df = m5.data.read()
df
Out[3]:
amount_0 close_0 date high_0 instrument low_0 market_cap_float_0 open_0 turn_0 volume_0 换手 成交量 流通市值 收盘价 开盘价 成交额 最高价 最低价
0 1.466899e+09 1172.140625 2018-11-19 1175.381592 000001.SZA 1141.891846 1.862972e+11 1141.891846 0.792584 136088709 0.792584 136088709 1.862972e+11 1172.140625 1141.891846 1.466899e+09 1175.381592 1141.891846
1 1.099655e+09 1141.891846 2018-11-20 1166.739136 000001.SZA 1136.490356 1.814895e+11 1161.337524 0.602133 103387753 0.602133 103387753 1.814895e+11 1141.891846 1161.337524 1.099655e+09 1166.739136 1136.490356
2 7.056384e+08 1146.213135 2018-11-21 1152.695068 000001.SZA 1132.169067 1.821763e+11 1134.329712 0.387972 66615732 0.387972 66615732 1.821763e+11 1146.213135 1134.329712 7.056384e+08 1152.695068 1132.169067
3 5.271033e+08 1139.731201 2018-11-22 1152.695068 000001.SZA 1132.169067 1.811461e+11 1152.695068 0.291357 50026785 0.291357 50026785 1.811461e+11 1139.731201 1152.695068 5.271033e+08 1152.695068 1132.169067
4 7.306677e+08 1114.884033 2018-11-23 1142.972168 000001.SZA 1113.803711 1.771969e+11 1137.570679 0.407891 70035815 0.407891 70035815 1.771969e+11 1114.884033 1137.570679 7.306677e+08 1142.972168 1113.803711
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4513167 5.025992e+07 11.608728 2023-04-24 11.815106 605599.SHA 11.505540 6.600125e+09 11.629367 0.758342 4449022 0.758342 4449022 6.600125e+09 11.608728 11.629367 5.025992e+07 11.815106 11.505540
4513168 7.739483e+07 11.773830 2023-04-25 11.928614 605599.SHA 11.515859 6.693994e+09 11.711918 1.159832 6804480 1.159832 6804480 6.693994e+09 11.773830 11.711918 7.739483e+07 11.928614 11.515859
4513169 2.930293e+08 12.898587 2023-04-26 12.950182 605599.SHA 12.258818 7.333472e+09 12.382644 4.049788 23759210 4.049788 23759210 7.333472e+09 12.898587 12.382644 2.930293e+08 12.950182 12.258818
4513170 1.991076e+08 12.960501 2023-04-27 13.146240 605599.SHA 12.785080 7.368673e+09 12.929544 2.707732 15885664 2.707732 15885664 7.368673e+09 12.960501 12.929544 1.991076e+08 13.146240 12.785080
4513171 1.279716e+08 12.723166 2023-04-28 13.053370 605599.SHA 12.434238 7.233737e+09 13.022414 1.775142 10414364 1.775142 10414364 7.233737e+09 12.723166 13.022414 1.279716e+08 13.053370 12.434238

4104303 rows × 18 columns

In [33]:
def calc_chips(df,instrument):
    import warnings
    warnings.filterwarnings('ignore')

    df_ = df.copy()
    df_ = df_[df_['instrument'] == instrument]
    df_.reset_index(inplace=True,drop=True)

    #计算1日的获利盘比例
    def calc_one_day(data,n):
        data.sort_values(by='date',inplace=True)
        data.reset_index(inplace=True , drop=True)
        
        #获得换手率array
        turn_array = np.array(data['换手'])/100  #除以100 成为小数格式
        diff_array = 1 - turn_array              # (1-当日换手) 数列
        cp_array = diff_array.cumprod()          # 等差数列连乘
        mul = cp_array[::-1]                     # 等差数列翻转获得乘子
        mul = np.roll(mul,-1)                    # 对乘子进行shift 并将最后的值赋值为1 (即观察日换手乘数为1)
        mul[-1] = 1
        mul_turn = turn_array * mul

        #计算百分之多少的筹码
        turn_cumsum = mul_turn[::-1]               #对调整后的换手 倒序后累加
        turn_cumsum = turn_cumsum.cumsum()         #累加
        target = np.ones(n) * 1.5  #这里输入计算百分之多少的筹码     #根据想计算的筹码总量百分比设定数值

        pos = np.argmin(np.abs(turn_cumsum - target))   #找到最接近百分比值的位置,做切片操作
        mul_turn = mul_turn[-pos:]
        tmp = data.iloc[-pos:]
        tmp.reset_index(drop=True,inplace=True)

        tmp['adj_turn'] = pd.Series(mul_turn)             #adj_turn = 历史换手率等价于今天的换手率
        tmp['adj_amo'] = tmp['adj_turn'] * tmp['成交额']  #adj_amo = 根据换手率调整成交额

        #计算获利盘比例
        tmp.sort_values(by='收盘价',ascending=True,inplace=True) #按收盘价排序 
        tmp['cum_amo'] = tmp['adj_amo'].cumsum()                 #调整后的成交额累加
        num = tmp['cum_amo'].iloc[-1]                            #获取成交额累加结果
        tmp['winner_ratio'] = tmp['cum_amo'] /num                #计算获利盘占比
        tmp.sort_values(by='date',inplace=True)
        return tmp.iloc[[-1],:]                             #输出最后一行
    
    
    idx_list = df_.index.tolist()
    len_ = len(idx_list)
    pos = 0
    n = 400
    if len_ < n:
        n = len_

    lst = []
    while pos <= len_:
        try:

            ep = idx_list[pos]

            sp = ep - n + 1
            pos += 1

            if sp >= 0:
                data_tmp = df_.loc[sp:ep]
                outputs = calc_one_day(data_tmp,n)
                lst.append(outputs)
                pos += 1
        except:
            pos +=1

    data = pd.concat(lst)

    #用于研究未来收益期望使用,不需要可以注释掉
    data.reset_index(inplace=True,drop=True)
    data['future_return_5'] = data['收盘价'].shift(-5)/data['收盘价']
    data['future_return_1'] = data['收盘价'].shift(-1)/data['收盘价']
    data.dropna(inplace=True)

    return data
In [34]:
lst = df.instrument.unique().tolist()

results = Parallel(n_jobs=32)(delayed(calc_chips)(df,ins) for ins in lst)
In [38]:
df_ = pd.concat(results)
In [39]:
df_.sort_values(by='date',inplace=True)
df_.reset_index(inplace=True,drop=True)
df_
Out[39]:
amount_0 close_0 date high_0 instrument low_0 market_cap_float_0 open_0 turn_0 volume_0 ... 开盘价 成交额 最高价 最低价 adj_turn adj_amo cum_amo winner_ratio future_return_5 future_return_1
0 2.871415e+09 1653.515381 2020-07-13 1674.614624 000001.SZA 1610.206299 2.889517e+11 1632.416138 0.998240 193716012 ... 1632.416138 2.871415e+09 1674.614624 1610.206299 0.009982 2.866362e+07 1.133279e+09 0.602982 0.889187 0.958361
1 6.967145e+07 41.735756 2020-07-13 41.926765 002543.SZA 40.637447 5.513536e+09 40.876209 1.274780 8041813 ... 40.876209 6.967145e+07 41.926765 40.637447 0.012748 8.881576e+05 1.293677e+07 0.430227 0.927918 0.979405
2 1.453664e+08 40.977814 2020-07-13 41.510349 603515.SHA 40.568172 2.262030e+10 40.581829 0.643260 4848631 ... 40.581829 1.453664e+08 41.510349 40.568172 0.006433 9.350845e+05 2.525545e+07 0.730507 1.020993 1.015328
3 4.211733e+08 94.161919 2020-07-13 94.256844 002541.SZA 86.125252 1.099705e+10 86.125252 3.938262 14552851 ... 86.125252 4.211733e+08 94.256844 86.125252 0.039383 1.658691e+07 3.744348e+08 1.000000 1.228159 1.077957
4 6.856912e+07 39.469479 2020-07-13 39.787781 002540.SZA 37.957542 4.439177e+09 38.037117 1.560603 13967320 ... 38.037117 6.856912e+07 39.787781 37.957542 0.015606 1.070091e+06 5.846393e+07 0.476492 1.030242 0.965726
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1175484 2.943749e+07 6.316783 2023-04-14 6.349400 601825.SHA 6.316783 2.685245e+10 6.349400 0.109467 5059307 ... 6.349400 2.943749e+07 6.349400 6.316783 0.001095 3.222434e+04 3.937714e+05 0.097570 1.017212 1.010327
1175485 1.788348e+08 12.890762 2023-04-14 13.316669 000965.SZA 12.862369 5.038632e+09 13.089519 3.506258 38913531 ... 13.089519 1.788348e+08 13.316669 12.862369 0.035063 6.270409e+06 2.379574e+07 0.090714 0.887665 0.980176
1175486 3.116128e+08 396.620483 2023-04-14 403.538300 600426.SHA 396.013672 6.901624e+10 396.741852 0.448944 9481153 ... 396.741852 3.116128e+08 403.538300 396.013672 0.004489 1.398966e+06 1.208997e+08 0.443368 0.975520 1.043758
1175487 9.630090e+07 22.245485 2023-04-14 22.398151 601369.SHA 22.027393 1.696647e+10 22.289104 0.568537 9456933 ... 22.289104 9.630090e+07 22.398151 22.027393 0.005685 5.475067e+05 1.444103e+07 0.032620 0.926471 1.006863
1175488 3.230988e+07 11.894893 2023-04-14 11.958389 603012.SHA 11.789066 3.577467e+09 11.873728 0.905486 5763964 ... 11.873728 3.230988e+07 11.958389 11.789066 0.009055 2.925616e+05 4.730272e+06 0.066028 1.023132 1.001779

1175489 rows × 24 columns

In [40]:
def calc_mr(df):
    mean = df['future_return_1'].mean()
    groups = df.groups.unique()[0]

    df_tmp = pd.DataFrame({
                    'label':groups,
                    'mr':mean},

                    columns=[ 
                            'label',
                            'mr',
],
                index=pd.RangeIndex(start=0, stop=1))
    return df_tmp

df_['groups'] = pd.cut(df_['winner_ratio'],bins=7,labels=False)
df_plot = df_.groupby(['groups']).apply(calc_mr)
df_plot.reset_index(drop=True,inplace=True)
In [41]:
df_plot
Out[41]:
label mr
0 0 1.001719
1 1 1.001153
2 2 1.000856
3 3 1.000548
4 4 0.999764
5 5 0.999412
6 6 0.998695
In [42]:
import plotly.graph_objects as go

# 创建柱状图
fig = go.Figure(
    data=[go.Bar(x=df_plot.label, y=df_plot.mr-1, text=df_plot.mr-1, textposition='auto')],
    layout_title_text='获利盘分层未来5日收益期望'
)

fig.show()
In [11]:
def calc_ic(df):
    mean = df['future_return_5'].mean()
    groups = df.groups.unique()[0]
    date = df.date.unique()[0]
    winner = df['winner_ratio'].mean()

    df_tmp = pd.DataFrame({'date':date,
                    'label':groups,
                    'mr':mean,
                    'winner':winner},

                    columns=[ 'date',
                            'label',
                            'mr',
                            'winner'
],
                index=pd.RangeIndex(start=0, stop=1))
    return df_tmp

df_['groups'] = pd.cut(df_['winner_ratio'],bins=7,labels=False)
df_plot_ic = df_.groupby(['groups','date']).apply(calc_ic)
df_plot_ic.reset_index(drop=True,inplace=True)
In [ ]:
from plotly.subplots import make_subplots

plot = pd.DataFrame({'label':None,
                    'corr':None},
                    columns=[ 
                            'label',
                            'corr'],
                index=pd.RangeIndex(start=0, stop=1))

for num in range(7):
    cut = df_plot_ic[df_plot_ic['label']==num]
    corr = cut['winner'].corr(cut['mr'])
    
    plot.loc[num,'corr'] =corr
    plot.loc[num,'label']= num
In [ ]:
fig = go.Figure(
    data=[go.Bar(x=plot['label'], y=plot['corr'])],
    layout_title_text='不同层,获利盘与未来收益率相关性'
)

fig.show()
In [37]:
df_
Out[37]:
amount_0 close_0 date high_0 instrument low_0 market_cap_float_0 open_0 turn_0 volume_0 ... 开盘价 成交额 最高价 最低价 adj_turn adj_amo cum_amo winner_ratio future_return_5 groups
0 2.871415e+09 1653.515381 2020-07-13 1674.614624 000001.SZA 1610.206299 2.889517e+11 1632.416138 0.998240 193716012 ... 1632.416138 2.871415e+09 1674.614624 1610.206299 0.009982 2.866362e+07 1.133279e+09 0.602982 0.889187 4
1 6.967145e+07 41.735756 2020-07-13 41.926765 002543.SZA 40.637447 5.513536e+09 40.876209 1.274780 8041813 ... 40.876209 6.967145e+07 41.926765 40.637447 0.012748 8.881576e+05 1.293677e+07 0.430227 0.927918 3
2 1.453664e+08 40.977814 2020-07-13 41.510349 603515.SHA 40.568172 2.262030e+10 40.581829 0.643260 4848631 ... 40.581829 1.453664e+08 41.510349 40.568172 0.006433 9.350845e+05 2.525545e+07 0.730507 1.020993 5
3 4.211733e+08 94.161919 2020-07-13 94.256844 002541.SZA 86.125252 1.099705e+10 86.125252 3.938262 14552851 ... 86.125252 4.211733e+08 94.256844 86.125252 0.039383 1.658691e+07 3.744348e+08 1.000000 1.228159 6
4 6.856912e+07 39.469479 2020-07-13 39.787781 002540.SZA 37.957542 4.439177e+09 38.037117 1.560603 13967320 ... 38.037117 6.856912e+07 39.787781 37.957542 0.015606 1.070091e+06 5.846393e+07 0.476492 1.030242 3
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1175484 2.943749e+07 6.316783 2023-04-14 6.349400 601825.SHA 6.316783 2.685245e+10 6.349400 0.109467 5059307 ... 6.349400 2.943749e+07 6.349400 6.316783 0.001095 3.222434e+04 3.937714e+05 0.097570 1.017212 0
1175485 1.788348e+08 12.890762 2023-04-14 13.316669 000965.SZA 12.862369 5.038632e+09 13.089519 3.506258 38913531 ... 13.089519 1.788348e+08 13.316669 12.862369 0.035063 6.270409e+06 2.379574e+07 0.090714 0.887665 0
1175486 3.116128e+08 396.620483 2023-04-14 403.538300 600426.SHA 396.013672 6.901624e+10 396.741852 0.448944 9481153 ... 396.741852 3.116128e+08 403.538300 396.013672 0.004489 1.398966e+06 1.208997e+08 0.443368 0.975520 3
1175487 9.630090e+07 22.245485 2023-04-14 22.398151 601369.SHA 22.027393 1.696647e+10 22.289104 0.568537 9456933 ... 22.289104 9.630090e+07 22.398151 22.027393 0.005685 5.475067e+05 1.444103e+07 0.032620 0.926471 0
1175488 3.230988e+07 11.894893 2023-04-14 11.958389 603012.SHA 11.789066 3.577467e+09 11.873728 0.905486 5763964 ... 11.873728 3.230988e+07 11.958389 11.789066 0.009055 2.925616e+05 4.730272e+06 0.066028 1.023132 0

1175489 rows × 24 columns

In [43]:
def calc_return(df):
    df['return'] = df['future_return_1'].mean()
    return df

plot_ic = df_.groupby(['date','groups']).apply(calc_return)
In [44]:
test = plot_ic.drop_duplicates(subset=['date','groups'])
In [45]:
def cal_cum_prod(df):
    df['equity'] = df['return'].cumprod()
    return df 

test = test.groupby('groups').apply(cal_cum_prod)
In [46]:
test[test['groups']==0]['equity']
Out[46]:
106        0.968811
2894       0.907427
2952       0.874508
5832       0.888125
5941       0.923778
             ...   
1164577    1.560249
1167691    1.565031
1168490    1.564147
1171600    1.576359
1172392    1.575074
Name: equity, Length: 671, dtype: float64
In [48]:
import plotly.graph_objects as go

# 创建一个折线图对象
fig = go.Figure()

# 添加折线到图表
fig.add_trace(go.Scatter(x=test.date.unique(), y=test[test['groups']==0]['equity'], mode='lines', name='group_0'))
fig.add_trace(go.Scatter(x=test.date.unique(), y=test[test['groups']==1]['equity'], mode='lines', name='group_1'))
fig.add_trace(go.Scatter(x=test.date.unique(), y=test[test['groups']==2]['equity'], mode='lines', name='group_2'))
fig.add_trace(go.Scatter(x=test.date.unique(), y=test[test['groups']==3]['equity'], mode='lines', name='group_3'))
fig.add_trace(go.Scatter(x=test.date.unique(), y=test[test['groups']==4]['equity'], mode='lines', name='group_4'))
fig.add_trace(go.Scatter(x=test.date.unique(), y=test[test['groups']==5]['equity'], mode='lines', name='group_5'))
fig.add_trace(go.Scatter(x=test.date.unique(), y=test[test['groups']==6]['equity'], mode='lines', name='group_6'))


# 设置图表标题和坐标轴标签
fig.update_layout(title='分层测试', xaxis_title='X Axis Label', yaxis_title='Y Axis Label')

# 显示图表
fig.show()