from bigdatasource.api import DataSource
from biglearning.api import M
from biglearning.api import tools as T
from biglearning.module2.common.data import Outputs
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
#并行任务
from joblib import Parallel, delayed
import copy
m1 = M.instruments.v2(
start_date='2021-01-01',
end_date='2023-03-31',
market='CN_STOCK_A',
instrument_list='',
max_count=0
)
m2 = M.input_features.v1(
features="""
收益=return_0
收盘=close_0
收盘=close_0
开盘=open_0
"""
)
m3 = M.general_feature_extractor.v7(
instruments=m1.data,
features=m2.data,
start_date='',
end_date='',
before_start_days=500
)
m4 = M.derived_feature_extractor.v3(
input_data=m3.data,
features=m2.data,
date_col='date',
instrument_col='instrument',
drop_na=False,
remove_extra_columns=False,
user_functions={}
)
m5 = M.chinaa_stock_filter.v1(
input_data=m4.data,
index_constituent_cond=['全部'],
board_cond=['上证主板', '深证主板', '创业板'],
industry_cond=['全部'],
st_cond=['正常'],
delist_cond=['非退市'],
output_left_data=False
)
df = m5.data.read()
df
def calc_market_return(df):
df['市场收益'] = df['收益'].mean()
return df
df = df.groupby('date').apply(calc_market_return)
df['factor_1'] = np.abs(df['市场收益'] - df['收益']) / (np.abs(df['市场收益']) + np.abs(df['收益']) + 0.1 )
def calc_adj_fac(df):
df['决策分'] = df['收盘']/df['收盘'].shift()-1
df['加权决策分'] = df['决策分'] * df['factor_1']
df['惊恐收益'] = df['加权决策分'].rolling(20).mean()
df['惊恐波动'] = df['加权决策分'].rolling(20).std()
df['前瞻收益_1'] = df['收盘'].shift(-1) / df['收盘']
return df
df = df.groupby('instrument').apply(calc_adj_fac)
df.dropna(inplace=True)
df
type(df['惊恐收益'].iloc[1])
type(df['前瞻收益_1'].iloc[1])
def calc_mul(df,instrument):
df_ = df.copy()
df_ = df_[df_['instrument'] == instrument]
df_.reset_index(inplace=True)
df_['惊恐收益_mul'] = df_['惊恐收益'].rolling(20).corr(df_['前瞻收益_1'])
df_['惊恐波动_mul'] = df_['惊恐波动'].rolling(20).corr(df_['前瞻收益_1'])
return df_
lst = df.instrument.unique().tolist()
results = Parallel(n_jobs=-1)(delayed(calc_mul)(df,ins) for ins in lst)
temp = pd.concat(results)
temp
temp.dropna(inplace=True)
temp['惊恐收益_mul_adj'] = temp.groupby('instrument')['惊恐收益_mul'].shift()
temp['惊恐波动_mul_adj'] = temp.groupby('instrument')['惊恐波动_mul'].shift()
temp
temp_ = temp.copy()
temp_.dropna(inplace=True)
temp_['惊恐收益'] = temp_['惊恐收益'] * temp_['惊恐收益_mul_adj']
temp_['惊恐波动'] = temp_['惊恐波动'] * temp_['惊恐波动_mul_adj']
temp_
df = temp_
df['groups_1'] = pd.qcut(df['惊恐收益'],q=7,labels=False)
df['groups_2'] = pd.qcut(df['惊恐波动'],q=7,labels=False)
df=df[['date','instrument','前瞻收益_1','惊恐波动','惊恐收益','groups_1','groups_2']]
def calc_return_1(df):
mean = df['前瞻收益_1'].mean()
df['return_2'] = mean
return df
def calc_return_2(df):
mean = df['前瞻收益_1'].mean()
df['return_2'] = mean
return df
plot_pnl = df.groupby(['date','groups_1']).apply(calc_return_1)
plot_pnl = plot_pnl.groupby(['date','groups_2']).apply(calc_return_2)
test_1 = plot_pnl.drop_duplicates(subset=['date','groups_1'])
def cal_cum_prod(df):
df['equity_1'] = df['return_1'].cumprod()
return df
test_1 = test_1.groupby('groups_1').apply(cal_cum_prod)
import plotly.graph_objects as go
# 创建一个折线图对象
fig = go.Figure()
# 添加折线到图表
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_1[test_1['groups_1']==0]['equity_1'], mode='lines', name='group_0'))
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_1[test_1['groups_1']==1]['equity_1'], mode='lines', name='group_1'))
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_1[test_1['groups_1']==2]['equity_1'], mode='lines', name='group_2'))
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_1[test_1['groups_1']==3]['equity_1'], mode='lines', name='group_3'))
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_1[test_1['groups_1']==4]['equity_1'], mode='lines', name='group_4'))
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_1[test_1['groups_1']==5]['equity_1'], mode='lines', name='group_5'))
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_1[test_1['groups_1']==6]['equity_1'], mode='lines', name='group_6'))
# 设置图表标题和坐标轴标签
fig.update_layout(title='惊恐收益因子分层净值', xaxis_title='日期', yaxis_title='PNL')
# 显示图表
fig.show()
test_2 = plot_pnl.drop_duplicates(subset=['date','groups_2'])
def cal_cum_prod(df):
df['equity_2'] = df['return_2'].cumprod()
return df
test_2 = test_2.groupby('groups_2').apply(cal_cum_prod)
test_2[test_2['date']=='2023-03-07']
import plotly.graph_objects as go
# 创建一个折线图对象
fig = go.Figure()
# 添加折线到图表
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_2[test_2['groups_2']==0]['equity_2'], mode='lines', name='group_0'))
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_2[test_2['groups_2']==1]['equity_2'], mode='lines', name='group_1'))
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_2[test_2['groups_2']==2]['equity_2'], mode='lines', name='group_2'))
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_2[test_2['groups_2']==3]['equity_2'], mode='lines', name='group_3'))
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_2[test_2['groups_2']==4]['equity_2'], mode='lines', name='group_4'))
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_2[test_2['groups_2']==5]['equity_2'], mode='lines', name='group_5'))
fig.add_trace(go.Scatter(x=test_1.date.unique(), y=test_2[test_2['groups_2']==6]['equity_2'], mode='lines', name='group_6'))
# 设置图表标题和坐标轴标签
fig.update_layout(title='惊恐波动因子分层净值', xaxis_title='日期', yaxis_title='PNL')
# 显示图表
fig.show()
re = test_1[test_1['date']=='2023-02-22']
re