# 本代码由可视化策略环境自动生成 2021年1月7日 18:07
# 本代码单元只能在可视化模式下编辑。您也可以拷贝代码,粘贴到新建的代码单元或者策略,然后修改。
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m7_run_bigquant_run(input_1, input_2, input_3):
ins = input_1.read_pickle()['instruments']
start_date = input_1.read_pickle()['start_date']
end_date = input_1.read_pickle()['end_date']
industry_df = D.history_data(ins,start_date=start_date,end_date=end_date,fields=['industry_sw_level1'])
processed_industry_df = industry_df.pivot(index='date',columns='instrument',values='industry_sw_level1')\
.dropna(how='all')\
.stack()\
.apply(lambda x: 'SW'+str(int(x))+'.SHA')\
.reset_index()\
.rename(columns={0:'industry_code'})
## 过滤为0的数据异常,不过不应该被简单过滤
processed_industry_df = processed_industry_df[processed_industry_df['industry_code'].apply(lambda x:len(x)==12)]
data_1 = DataSource.write_df(processed_industry_df)
return Outputs(data_1=data_1, data_2=None, data_3=None)
# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
def m7_post_run_bigquant_run(outputs):
return outputs
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m4_run_bigquant_run(input_1, input_2, input_3, topN):
# 示例代码如下。在这里编写您的代码
amount_df = input_1.read_df()
universe_dic = amount_df.groupby('date').apply(lambda df: df.sort_values('amount_0', ascending=False)[:topN].instrument.tolist()).to_dict()
return Outputs(data_1=DataSource().write_pickle(universe_dic))
# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
def m4_post_run_bigquant_run(outputs):
return outputs
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m5_run_bigquant_run(input_1, input_2, input_3, decay):
# 示例代码如下。在这里编写您的代码
df = input_1.read_df()
factor = list(set(input_2.read_pickle()).difference(['end_date', 'instruments', 'start_date']))[0]
pvt = df.pivot(index='date', columns='instrument', values=factor)
pvt = pvt.rolling(decay).apply(lambda x: sum([(i+1)*xx for i,xx in enumerate(x)])/sum(range(decay+1)))
result = pvt.unstack().reset_index().rename(columns={0:factor})
ds = DataSource().write_df(result)
return Outputs(data_1=ds, data_2=None, data_3=None)
# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
def m5_post_run_bigquant_run(outputs):
return outputs
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m8_run_bigquant_run(input_1, input_2, input_3, max_stock_weight, neuralized_type):
# 示例代码如下。在这里编写您的代码
df = input_1.read_df()
factor = list(set(input_3.read_pickle()).difference(['end_date', 'instruments', 'start_date']))[0]
pvt = df.pivot(index='date', columns='instrument', values=factor)
universe_dic = input_2.read_pickle()
all_dates = sorted(list(universe_dic.keys()))
weights = {}
for date in all_dates:
alpha = pvt.loc[date, universe_dic[date]]
if neuralized_type == 'market':
# 市场中性化
alpha = alpha - alpha.mean()
elif neuralized_type == 'industry':
# 行业中性化
group_mean = df[df.date == date].groupby('industry_code', as_index=False).mean().rename(columns={factor:'group_mean'})
tmp = df[df.date == date].merge(group_mean, how='left', on='industry_code')
tmp[factor] = tmp[factor]- tmp['group_mean']
alpha = tmp.set_index('instrument')[factor].loc[universe_dic[date]]
alpha_weight = alpha / alpha.abs().sum()
alpha_weight = alpha_weight.clip(-max_stock_weight, max_stock_weight) # 权重截断处理
alpha_weight = alpha_weight / alpha_weight.abs().sum()
weights[date] = alpha_weight
ds = DataSource().write_pickle(weights)
return Outputs(data_1=ds, data_2=None, data_3=None)
# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
def m8_post_run_bigquant_run(outputs):
return outputs
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m15_run_bigquant_run(input_1, input_2, input_3):
# 示例代码如下。在这里编写您的代码
alpha_weights = input_1.read_pickle()
ret_df = input_2.read_df()
ret0_df = ret_df.pivot(index='date', columns='instrument', values='close_0/close_1-1')
ret1_df = ret_df.pivot(index='date', columns='instrument', values='close_0/open_0-1')
alpha0, alpha1, alpha2 = {}, {}, {}
all_dates = sorted(alpha_weights.keys())
last_date = None
w_prev = None
for date in all_dates:
#Alpha0: 权重是当天因子值,收益:Close/Open -1
#Alpha1: 权重是前一天因子值,收益:Close/shift(Close, 1) -1
#Alpha2:权重是前一天因子值,收益:Close/Open -1
#根据统计,市场平均情况下次日低开概率较大,这个导致了alpha1的收益会更低
w = alpha_weights[date]
alpha0[date] = (ret1_df.loc[date, w.index]*w).sum()
alpha1[date] = (ret0_df.loc[date, w_prev.index]*w_prev).sum() if w_prev is not None else 0.0
alpha2[date] = (ret1_df.loc[date, w_prev.index]*w_prev).sum() if w_prev is not None else 0.0
w_prev = w
alpha0 = pd.Series(alpha0)
alpha1 = pd.Series(alpha1)
alpha2 = pd.Series(alpha2)
alpha = pd.DataFrame({'alpha0':alpha0,
'alpha1':alpha1,
'alpha2':alpha2})
ds = DataSource().write_df(alpha)
return Outputs(data_1=ds, data_2=None, data_3=None)
# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
def m15_post_run_bigquant_run(outputs):
return outputs
# Python 代码入口函数,input_1/2/3 对应三个输入端,data_1/2/3 对应三个输出端
def m16_run_bigquant_run(input_1, input_2, input_3, booksize):
# 示例代码如下。在这里编写您的代码
def calc_daily_turnover(alpha_weights):
all_dates = sorted(alpha_weights.keys())
last_date = None
turnover = {}
for date in all_dates:
w = alpha_weights[date]
w.name = 'w'
w_prev = alpha_weights[last_date] if last_date is not None else pd.Series(0,index=w.index)
w_prev.name = 'w_prev'
tmp = pd.concat([w,w_prev], axis=1).fillna(0)
turnover[date] = (tmp['w']-tmp['w_prev']).abs().sum()
last_date = date
turnover = pd.Series(turnover)
turnover /= 2
return turnover
import empyrical
alpha_df = m15.data_1.read_df()
alpha_weights = m8.data_1.read_pickle()
dailyPnL = alpha_df*booksize
PnL = dailyPnL.groupby(dailyPnL.index.year).sum()
IR = dailyPnL.groupby(dailyPnL.index.year).mean()/dailyPnL.groupby(dailyPnL.index.year).std()
sharpe = IR * np.sqrt(252)
returns = dailyPnL.groupby(dailyPnL.index.year).sum()/booksize
daily_turnover = calc_daily_turnover(alpha_weights)
turnover = daily_turnover.groupby(daily_turnover.index.year).mean()
fitness = sharpe * np.sqrt(returns.abs().apply(lambda x: x/turnover))
margin = PnL.apply(lambda x: x/(daily_turnover.groupby(daily_turnover.index.year).sum()*booksize)*10000)
long_short_count = pd.DataFrame({date:((w>0).sum(), (w<0).sum()) for date, w in alpha_weights.items()}).T
long_short_count = long_short_count.rename(columns={0: 'long', 1: 'short'})
long_short_count = long_short_count.groupby(long_short_count.index.year).sum()
max_drawdown = dailyPnL.apply(lambda x: empyrical.max_drawdown(x/booksize))
dataset_ds = DataSource()
output_store = dataset_ds.open_df_store()
dailyPnL.to_hdf(output_store, key='dailyPnL')
PnL.to_hdf(output_store, key='PnL')
turnover.to_hdf(output_store, key='turnover')
fitness.to_hdf(output_store, key='fitness')
margin.to_hdf(output_store, key='margin')
max_drawdown.to_hdf(output_store, key='max_drawdown')
long_short_count.to_hdf(output_store, key='long_short_count')
sharpe.to_hdf(output_store, key='sharpe')
returns.to_hdf(output_store, key='returns')
dataset_ds.close_df_store()
return Outputs(data_1=dataset_ds)
# 后处理函数,可选。输入是主函数的输出,可以在这里对数据做处理,或者返回更友好的outputs数据格式。此函数输出不会被缓存。
def m16_post_run_bigquant_run(outputs):
return outputs
m1 = M.instruments.v2(
start_date='2010-01-01',
end_date='2018-09-30',
market='CN_STOCK_A',
instrument_list='',
max_count=0
)
m7 = M.cached.v3(
input_1=m1.data,
run=m7_run_bigquant_run,
post_run=m7_post_run_bigquant_run,
input_ports='',
params='{}',
output_ports=''
)
m2 = M.input_features.v1(
features='mean(amount_0,66)'
)
m3 = M.general_feature_extractor.v7(
instruments=m1.data,
features=m2.data,
start_date='',
end_date='',
before_start_days=0
)
m4 = M.cached.v3(
input_1=m3.data,
run=m4_run_bigquant_run,
post_run=m4_post_run_bigquant_run,
input_ports='',
params='{\'topN\':2000}',
output_ports='',
m_cached=False
)
m6 = M.input_features.v1(
features='-1*market_cap_0',
m_cached=False
)
m10 = M.general_feature_extractor.v7(
instruments=m1.data,
features=m6.data,
start_date='',
end_date=''
)
m11 = M.derived_feature_extractor.v3(
input_data=m10.data,
features=m6.data,
date_col='date',
instrument_col='instrument',
drop_na=False,
remove_extra_columns=False,
user_functions={}
)
m5 = M.cached.v3(
input_1=m11.data,
input_2=m6.data,
run=m5_run_bigquant_run,
post_run=m5_post_run_bigquant_run,
input_ports='',
params='{\'decay\': 4}',
output_ports='',
m_cached=False
)
m9 = M.join.v3(
data1=m7.data_1,
data2=m5.data_1,
on='date,instrument',
how='inner',
sort=False
)
m8 = M.cached.v3(
input_1=m9.data,
input_2=m4.data_1,
input_3=m6.data,
run=m8_run_bigquant_run,
post_run=m8_post_run_bigquant_run,
input_ports='',
params="""{'max_stock_weight': 0.1,
'neuralized_type': 'industry'}""",
output_ports='',
m_cached=False
)
m12 = M.input_features.v1(
features="""close_0/open_0-1
close_0/close_1-1
"""
)
m13 = M.general_feature_extractor.v7(
instruments=m1.data,
features=m12.data,
start_date='',
end_date='',
before_start_days=0
)
m14 = M.derived_feature_extractor.v3(
input_data=m13.data,
features=m12.data,
date_col='date',
instrument_col='instrument',
drop_na=False,
remove_extra_columns=False,
user_functions={}
)
m15 = M.cached.v3(
input_1=m8.data_1,
input_2=m14.data,
run=m15_run_bigquant_run,
post_run=m15_post_run_bigquant_run,
input_ports='',
params='{}',
output_ports='',
m_cached=False
)
m16 = M.cached.v3(
input_1=m15.data_1,
run=m16_run_bigquant_run,
post_run=m16_post_run_bigquant_run,
input_ports='',
params='{\'booksize\': 20000000}',
output_ports='',
m_cached=False
)
[2021-01-07 17:57:03.732014] INFO: moduleinvoker: instruments.v2 开始运行..
[2021-01-07 17:57:03.796396] INFO: moduleinvoker: 命中缓存
[2021-01-07 17:57:03.797548] INFO: moduleinvoker: instruments.v2 运行完成[0.065579s].
[2021-01-07 17:57:03.806212] INFO: moduleinvoker: cached.v3 开始运行..
[2021-01-07 17:57:04.393790] INFO: moduleinvoker: 命中缓存
[2021-01-07 17:57:04.395506] INFO: moduleinvoker: cached.v3 运行完成[0.589297s].
[2021-01-07 17:57:04.493886] INFO: moduleinvoker: input_features.v1 开始运行..
[2021-01-07 17:57:04.727444] INFO: moduleinvoker: 命中缓存
[2021-01-07 17:57:04.728822] INFO: moduleinvoker: input_features.v1 运行完成[0.23493s].
[2021-01-07 17:57:04.743588] INFO: moduleinvoker: general_feature_extractor.v7 开始运行..
[2021-01-07 17:57:04.782003] INFO: moduleinvoker: 命中缓存
[2021-01-07 17:57:04.783877] INFO: moduleinvoker: general_feature_extractor.v7 运行完成[0.04032s].
[2021-01-07 17:57:04.786093] INFO: moduleinvoker: cached.v3 开始运行..
[2021-01-07 17:57:16.411939] INFO: moduleinvoker: cached.v3 运行完成[11.625796s].
[2021-01-07 17:57:16.414519] INFO: moduleinvoker: input_features.v1 开始运行..
[2021-01-07 17:57:16.608263] INFO: moduleinvoker: input_features.v1 运行完成[0.193734s].
[2021-01-07 17:57:16.623415] INFO: moduleinvoker: general_feature_extractor.v7 开始运行..
[2021-01-07 17:57:16.637443] INFO: moduleinvoker: 命中缓存
[2021-01-07 17:57:16.639008] INFO: moduleinvoker: general_feature_extractor.v7 运行完成[0.015593s].
[2021-01-07 17:57:16.651983] INFO: moduleinvoker: derived_feature_extractor.v3 开始运行..
[2021-01-07 17:57:21.258439] INFO: derived_feature_extractor: 提取完成 -1*market_cap_0, 0.062s
[2021-01-07 17:57:22.265858] INFO: derived_feature_extractor: /y_2009, 95020
[2021-01-07 17:57:22.463722] INFO: derived_feature_extractor: /y_2010, 431567
[2021-01-07 17:57:23.896466] INFO: derived_feature_extractor: /y_2011, 511455
[2021-01-07 17:57:24.807046] INFO: derived_feature_extractor: /y_2012, 565675
[2021-01-07 17:57:25.452597] INFO: derived_feature_extractor: /y_2013, 564168
[2021-01-07 17:57:26.272828] INFO: derived_feature_extractor: /y_2014, 569948
[2021-01-07 17:57:26.820783] INFO: derived_feature_extractor: /y_2015, 569698
[2021-01-07 17:57:27.296012] INFO: derived_feature_extractor: /y_2016, 641546
[2021-01-07 17:57:28.059610] INFO: derived_feature_extractor: /y_2017, 743233
[2021-01-07 17:57:28.728097] INFO: derived_feature_extractor: /y_2018, 606288
[2021-01-07 17:57:30.134875] INFO: moduleinvoker: derived_feature_extractor.v3 运行完成[13.48287s].
[2021-01-07 17:57:30.143377] INFO: moduleinvoker: cached.v3 开始运行..
[2021-01-07 17:58:14.616962] INFO: moduleinvoker: cached.v3 运行完成[44.473568s].
[2021-01-07 17:58:14.621124] INFO: moduleinvoker: join.v3 开始运行..
[2021-01-07 17:58:33.166152] INFO: join: /data, 行数=6002488/7833834, 耗时=15.882135s
[2021-01-07 17:58:36.127667] INFO: join: 最终行数: 6002488
[2021-01-07 17:58:36.454662] INFO: moduleinvoker: join.v3 运行完成[21.833521s].
[2021-01-07 17:58:36.457358] INFO: moduleinvoker: cached.v3 开始运行..
[2021-01-07 18:01:09.089575] INFO: moduleinvoker: cached.v3 运行完成[152.632191s].
[2021-01-07 18:01:09.092141] INFO: moduleinvoker: input_features.v1 开始运行..
[2021-01-07 18:01:09.107426] INFO: moduleinvoker: 命中缓存
[2021-01-07 18:01:09.109726] INFO: moduleinvoker: input_features.v1 运行完成[0.017567s].
[2021-01-07 18:01:09.295646] INFO: moduleinvoker: general_feature_extractor.v7 开始运行..
[2021-01-07 18:01:09.674657] INFO: moduleinvoker: 命中缓存
[2021-01-07 18:01:09.676006] INFO: moduleinvoker: general_feature_extractor.v7 运行完成[0.380376s].
[2021-01-07 18:01:09.677743] INFO: moduleinvoker: derived_feature_extractor.v3 开始运行..
[2021-01-07 18:01:09.683257] INFO: moduleinvoker: 命中缓存
[2021-01-07 18:01:09.684231] INFO: moduleinvoker: derived_feature_extractor.v3 运行完成[0.006485s].
[2021-01-07 18:01:09.685974] INFO: moduleinvoker: cached.v3 开始运行..
[2021-01-07 18:01:39.497172] INFO: moduleinvoker: cached.v3 运行完成[29.811158s].
[2021-01-07 18:01:39.500677] INFO: moduleinvoker: cached.v3 开始运行..
[2021-01-07 18:01:55.324050] INFO: moduleinvoker: cached.v3 运行完成[15.823345s].
T.plot((1+m15.data_1.read_df()['alpha0']).cumprod())
T.plot((1+m15.data_1.read_df()['alpha1']).cumprod())
T.plot((1+m15.data_1.read_df()['alpha2']).cumprod())
指标结果都在m16模块里,大家可以直接通过data source的方式来读取数据,每一个不同的指标我们用key来表示:
m16.data_1.read_df(key='returns')