克隆策略

抽取基础因子数据¶

In [1]:

import numpy as np
import pandas as pd
import numpy as np
from cvxpy import *
import datetime
from sklearn.linear_model import LinearRegression 

start_date = '2015-10-01'      
end_date = '2019-02-01'

m1 = M.instruments.v2(
    start_date=start_date,
    end_date=end_date,
    market='CN_STOCK_A',
    instrument_list=''
)

m7 = M.input_features.v1(
    features="""
beta_csi500_180_0
ta_mom_60_0
log(market_cap_0+0.0001)
west_eps_ftm_0/close_0
fs_net_income_0/market_cap_0
fs_eps_0/close_0
volatility_240_0
return_20
swing_volatility_240_0
fs_net_profit_qoq_0
fs_net_profit_yoy_0
fs_net_profit_0
west_netprofit_ftm_0
fs_common_equity_0/market_cap_0
(fs_non_current_liabilities_0+market_cap_0)/market_cap_0
fs_total_liability_0/(fs_current_assets_0+fs_non_current_assets_0)
(fs_common_equity_0+fs_non_current_liabilities_0)/fs_common_equity_0
turn_0
"""
)

m8 = M.general_feature_extractor.v7(
    instruments=m1.data,
    features=m7.data,
    start_date='',
    end_date='',
    before_start_days=0
)

m9 = M.derived_feature_extractor.v3(
    input_data=m8.data,
    features=m7.data,
    date_col='date',
    instrument_col='instrument',
    drop_na=False,
    remove_extra_columns=False
)
 
# the material we need to build the factors are stored in Material DataFrame 
materials = m9.data.read_df()
materials = materials.set_index('date')

[2019-03-11 09:36:54.473428] INFO: bigquant: instruments.v2 开始运行..

[2019-03-11 09:36:57.509338] INFO: bigquant: 命中缓存

[2019-03-11 09:36:57.511180] INFO: bigquant: instruments.v2 运行完成[3.037758s].

[2019-03-11 09:36:57.515463] INFO: bigquant: input_features.v1 开始运行..

[2019-03-11 09:36:57.523139] INFO: bigquant: 命中缓存

[2019-03-11 09:36:57.524690] INFO: bigquant: input_features.v1 运行完成[0.009229s].

[2019-03-11 09:36:57.572555] INFO: bigquant: general_feature_extractor.v7 开始运行..

[2019-03-11 09:36:59.707922] INFO: bigquant: 命中缓存

[2019-03-11 09:36:59.709594] INFO: bigquant: general_feature_extractor.v7 运行完成[2.137048s].

[2019-03-11 09:36:59.714128] INFO: bigquant: derived_feature_extractor.v3 开始运行..

[2019-03-11 09:37:01.608406] INFO: bigquant: 命中缓存

[2019-03-11 09:37:01.610875] INFO: bigquant: derived_feature_extractor.v3 运行完成[1.896735s].

构建衍生因子¶

In [2]:

# 构建九大类因子里面的所有小因子
temporary_storage_dict = dict()
temporary_storage_dict['STOM'] = materials.groupby('instrument')['turn_0'].rolling(21).sum().apply(lambda x:np.log(x)).reset_index().set_index('date').rename(columns={'turn_0':'STOM'})
temporary_storage_dict['STOQ'] = temporary_storage_dict['STOM'].groupby('instrument').apply(lambda x:np.exp(x['STOM'])).rolling(3).mean().apply(lambda x:np.log(x)).reset_index().set_index('date').rename(columns={'STOM':'STOQ'})
temporary_storage_dict['STOA'] = temporary_storage_dict['STOM'].groupby('instrument').apply(lambda x:np.exp(x['STOM'])).rolling(12).mean().apply(lambda x:np.log(x)).reset_index().set_index('date').rename(columns={'STOM':'STOA'})

temporary_storage_dict['MLEV'] = materials[['instrument','(fs_non_current_liabilities_0+market_cap_0)/market_cap_0']].rename(columns={'(fs_non_current_liabilities_0+market_cap_0)/market_cap_0':'MLEV'})
temporary_storage_dict['DTOA'] = materials[['instrument','fs_total_liability_0/(fs_current_assets_0+fs_non_current_assets_0)']].rename(columns={'fs_total_liability_0/(fs_current_assets_0+fs_non_current_assets_0)':'DTOA'})
temporary_storage_dict['BLEV'] = materials[['instrument','(fs_common_equity_0+fs_non_current_liabilities_0)/fs_common_equity_0']].rename(columns={'(fs_common_equity_0+fs_non_current_liabilities_0)/fs_common_equity_0':'BLEV'})
 
temporary_storage_dict['Value'] = materials[['instrument','fs_common_equity_0/market_cap_0']].rename(columns={'fs_common_equity_0/market_cap_0':'Value'})
 
temporary_storage_dict['Growth'] = materials[['instrument','fs_net_profit_yoy_0']].rename(columns={'fs_net_profit_yoy_0':'Growth'})
 
temporary_storage_dict['EPIBS'] = materials[['instrument','west_eps_ftm_0/close_0']].rename(columns={'west_eps_ftm_0/close_0':'EPIBS'})
temporary_storage_dict['ETOP'] = materials[['instrument','fs_net_income_0/market_cap_0']].rename(columns={'fs_net_income_0/market_cap_0':'ETOP'})
temporary_storage_dict['CETOP'] = materials[['instrument','fs_eps_0/close_0']].rename(columns={'fs_eps_0/close_0':'CETOP'})

temporary_storage_dict['DASTD'] = materials[['instrument','volatility_240_0']].rename(columns={'volatility_240_0':'DASTD'})
temporary_storage_dict['GMRA'] = materials.groupby('instrument')['return_20'].rolling(12).apply(lambda x:np.log(1+np.max(x))-np.log(1+np.min(x))).reset_index().set_index('date').rename(columns={'return_20':'GMRA'})

temporary_storage_dict['Size'] = materials[['instrument','log(market_cap_0+0.0001)']].rename(columns={'log(market_cap_0+0.0001)':'Size'})

temporary_storage_dict['Momentum'] = materials[['instrument','ta_mom_60_0']].rename(columns={'ta_mom_60_0':'Momentum'})

temporary_storage_dict['Beta'] = materials[['instrument','beta_csi500_180_0']].rename(columns={'beta_csi500_180_0':'Beta'})

for i in list(set(temporary_storage_dict.keys())):
    temporary_storage_dict[i] = temporary_storage_dict[i].reset_index()

    
initiate_factor_df  = temporary_storage_dict['STOM']  # 这里我们先取STOM因子数据
tmp_raw_factor_name = list(set(temporary_storage_dict.keys()).difference(['STOM']))
for i in tmp_raw_factor_name:
    initiate_factor_df = initiate_factor_df.merge(temporary_storage_dict[i], ) # 共18列

去极值处理¶

In [3]:

# 去极值处理
def winsorize(df):
    factors = set(df.columns).difference(['date', 'instrument'])
    for factor in factors:
        if df[df[factor].isnull()==True].shape[0] == len(df):
            continue
        median = np.median(df[factor])
        MAD = np.mean(np.abs(df[factor]) - median)
        df[factor][df[factor]>median+5*MAD] = median+5*MAD  # 剔除偏离中位数5倍以上的数据
        df[factor][df[factor]<median-5*MAD] = median-5*MAD
    return df

# 截面数据去极值,返回日频因子数据
extrema_remove_df = initiate_factor_df.groupby('date').apply(winsorize)

中性化处理和标准化处理¶

In [4]:

# 读取行业数据和市值数据,进行行业中性化处理和市值中性化处理,并月度化
ins = list(extrema_remove_df.instrument.unique())
industry_info = D.history_data(ins, start_date, end_date, ['industry_sw_level1','market_cap_float'])   
industry_info['market_cap_float'] = np.log10(industry_info['market_cap_float'])  # 对市值因子取对数

industry_df = industry_info[(industry_info.industry_sw_level1!=0)&(industry_info.industry_sw_level1!=-1)].set_index('date')  # 取出其中有行业分类的条目，如果行业划分是0或者-1就算了，代表无划分

industry_df = industry_df.groupby('instrument').apply(lambda x: x.resample('M', how='last'))  
industry_df.index = industry_df.index.droplevel(0)
industry_df.reset_index(inplace=True)

In [5]:

# 风格因子数据转换成月度，并与市值行业数据的拼接， 并把行业因子转换成dummy数据 
extrema_remove_df.set_index('date', inplace=True)
extrema_remove_df = extrema_remove_df.groupby('instrument').apply(lambda x: x.resample('M', how='last'))  # pandas 处理时间序列数据，将原始数据转换为月末数据
extrema_remove_df.index = extrema_remove_df.index.droplevel(0)
extrema_remove_df.reset_index(inplace=True)

extrema_remove_df = extrema_remove_df.merge(industry_df, on=['date', 'instrument']) # 风格因子数据和市值行业数据拼接

# 截面缺失值数据使用行业均值填充
extrema_remove_df = extrema_remove_df.groupby(['date', 'industry_sw_level1']).apply(lambda t: t.fillna(np.mean(t, axis=0)))
extrema_remove_df.reset_index(inplace=True, drop=True)

# 行业属性数据哑变量
dummy_industry_df = pd.get_dummies(extrema_remove_df['industry_sw_level1'], prefix='industry_prefix')   

# 哑变量与截面数据进行横向拼接
merge_df = pd.concat([extrema_remove_df, dummy_industry_df], axis=1).sort_values(['date', 'instrument'])  # 原始的行业因子那列应该还在

In [6]:

# 行业中性化处理和市值中性化处理
def calcu_single_factor(df, factor):
    industry_factor = [i for i in df.columns if i[:15]=='industry_prefix']
    # 对市值因子和行业因子进行回归
    xvars = ['market_cap_float'] + industry_factor
    used_factors = xvars + [factor] + ['instrument']
    
    used_factors_df = df[used_factors]
    used_factors_df = used_factors_df[~used_factors_df.isnull().T.any().T]
    if len(used_factors_df) == 0:
        return None
    X = used_factors_df[xvars]
    y = used_factors_df[factor]
    reg = LinearRegression()
    try:
        reg.fit(X,y)   # 将行业因子因子和市值因子对特定因子作回归
        res = y-reg.predict(X)
        used_factors_df[factor] = res
    except ValueError as e:
        used_factors_df[factor] = np.nan 

    return used_factors_df[['instrument',factor]]
 
factor_data_after_neutralize = pd.DataFrame(columns=['date','instrument'])

# 标准化处理
def standardlize(x):
    return (x-x.mean())/x.std()

detail_style_factor = [m for m in initiate_factor_df.columns if m not in ['date','instrument','market_cap_float']]  

# 对风格因子进行市值中性化处理,共计16个风格因子
for factor in detail_style_factor: 
    result_temp = merge_df.groupby('date').apply(lambda x: calcu_single_factor(x, factor)).reset_index().drop('level_1',axis=1)
    factor_data_after_neutralize = factor_data_after_neutralize.merge(result_temp, on=['date', 'instrument'], how='outer')
    
for factor in detail_style_factor:
    factor_data_after_neutralize[factor] = factor_data_after_neutralize.groupby('date')[factor].apply(standardlize)

# 用0值填充标准化后的缺失值 
factor_data_after_standard = factor_data_after_neutralize.fillna(0)

获取股票价格数据¶

In [7]:

# 获取股票价格并月度化
start = factor_data_after_standard.date.min().strftime('%Y-%m-%d')   
end = factor_data_after_standard.date.max().strftime('%Y-%m-%d')      
ins = list(set(factor_data_after_standard.instrument))    

price = DataSource('bar1d_CN_STOCK_A').read(instrument=ins, start_date=start, end_date=end, fields=['close'])    
price.rename(columns={'close':'price'}, inplace=True)
price = price.set_index('date').groupby('instrument').apply(lambda x: x.resample('M', how='last'))    
price.index = price.index.droplevel(0)
price.reset_index(inplace=True)

In [8]:

# 通过价格计算收益率并合成大类风格因子
tmp_factor = [m for m in merge_df.columns if m not in ['market_cap_float','industry_sw_level1']]
industry_prefix_factor = list(set(tmp_factor).difference(set(detail_style_factor)))  # industry_prefix_factor  是date instrument 和行业因子,共计30个因子

# 构建一个总的数据表, 合并行业因子和价格
monthly_style_and_industry_factor = factor_data_after_standard.merge(price, on=['date', 'instrument'], how='inner')
monthly_style_and_industry_factor = monthly_style_and_industry_factor.merge(merge_df[industry_prefix_factor], on=['date', 'instrument'], how='inner').set_index('date')  
 
# 计算出收益率
monthly_style_and_industry_factor.reset_index(inplace=True)
 
monthly_price_df = monthly_style_and_industry_factor.pivot_table(values='price',columns='instrument',index='date')
monthly_style_and_industry_factor = monthly_style_and_industry_factor.merge(monthly_price_df.pct_change().shift(-1).unstack(
                              ).to_frame('return').reset_index(),on=['date','instrument'])
monthly_style_and_industry_factor = monthly_style_and_industry_factor.dropna()

# 小因子等权重合并成大因子, 这里未来可以采取IC_IR法
monthly_style_and_industry_factor['Volatility'] = (monthly_style_and_industry_factor['DASTD']+monthly_style_and_industry_factor['GMRA'])/2
monthly_style_and_industry_factor['EarningYields'] = (monthly_style_and_industry_factor['EPIBS']+monthly_style_and_industry_factor['ETOP']+monthly_style_and_industry_factor['CETOP'])/3
monthly_style_and_industry_factor['Leverage'] = (monthly_style_and_industry_factor['MLEV']+monthly_style_and_industry_factor['BLEV']+monthly_style_and_industry_factor['DTOA'])/3
monthly_style_and_industry_factor['Liquidity'] = (monthly_style_and_industry_factor['STOM']+monthly_style_and_industry_factor['STOQ']+monthly_style_and_industry_factor['STOA'])/3
monthly_style_and_industry_factor.drop(['DASTD','GMRA','EPIBS','ETOP','CETOP','MLEV','BLEV','DTOA','STOM','STOQ','STOA'], axis=1, inplace=True)

计算股票特质收益率¶

In [9]:

# 回归(这里包括九大风格因子和行业因子), 通过回归计算出每个股票的残差
style_factor = ['Value','Growth','Size','Beta','Momentum','Volatility','EarningYields','Leverage','Liquidity']

def calcu_factor_specified_return(df):
    xvars = list(set(df.keys()).difference(['date','instrument','return','price']))
    X = df[xvars]
    y = df['return']
    reg = LinearRegression()
    try:
        reg.fit(X,y)
        res = y-reg.predict(X)
        df['specified_return'] = res
    except ValueError as e:
        df['specified_return'] = np.nan 

    return df

residual_ret = monthly_style_and_industry_factor.groupby('date').apply(lambda x: calcu_factor_specified_return(x))
residual_ret = residual_ret[['instrument', 'date', 'specified_return']]    # 一致的
residual_ret = residual_ret.sort_values('date').set_index('date')

计算因子收益率¶

In [46]:

len(all_factor_name)

Out[46]:

In [38]:

# 对收益率序列做回归,这里包括九大风格因子和行业因子 
C = dict()
end_month_date = list(set(monthly_style_and_industry_factor.date))
end_month_date.sort()
 
# 全部因子，包括28个行业因子和9个大类风格因子
all_factor_name = ['industry_prefix_630000','industry_prefix_110000','industry_prefix_410000','industry_prefix_620000','industry_prefix_610000','Leverage',
 'industry_prefix_220000','industry_prefix_360000','industry_prefix_420000','industry_prefix_720000','Size',
 'industry_prefix_350000','industry_prefix_330000','Value','industry_prefix_480000','Volatility','Liquidity','industry_prefix_640000','industry_prefix_280000',
 'industry_prefix_340000','industry_prefix_650000','Growth','industry_prefix_710000','Momentum','industry_prefix_240000','industry_prefix_510000','Beta','industry_prefix_430000','industry_prefix_370000',
 'industry_prefix_490000','industry_prefix_730000','EarningYields','industry_prefix_270000',
 'industry_prefix_450000','industry_prefix_210000','industry_prefix_460000','industry_prefix_230000']
 
# 计算出风格因子收益率 
for dt in end_month_date:  
    temp = monthly_style_and_industry_factor[monthly_style_and_industry_factor['date'] == dt]  
    X = np.array(temp[all_factor_name])
    Y = temp['return'].values   
    reg = LinearRegression()
    try:
        reg.fit(X, Y)
        C[dt] = reg.coef_
    except ValueError as e:
        C[dt] = np.nan
        
common_factor_ret = pd.DataFrame.from_dict(C, orient='index')
common_factor_ret.columns = all_factor_name
common_factor_ret = common_factor_ret.sort_index()   

# 计算出风格因子和行业因子的风险协方差矩阵,默认配置参数为36个月
special_risk = residual_ret.groupby('instrument').apply(lambda x: x['specified_return'].fillna(0).rolling(36).var()).reset_index()  

common_risk = common_factor_ret.rolling(36).cov()

计算基准在行业上的风险敞口¶

In [39]:

# 基准在行业上的暴露
def get_bm_industry_exposure(start_date='2019-01-01', end_date='2019-03-08'):
    
    index = DataSource('index_element_weight').read(start_date=start_date, end_date=end_date)
    hs300_df = index[index['instrument_index'] == '000300.HIX']

    industry_df = DataSource('industry_CN_STOCK_A').read(start_date=start_date, end_date=end_date, fields=['industry_sw_level1'])
    hs300_industry_df = pd.merge(hs300_df, industry_df, how='inner', on=['date', 'instrument'])

    def get_weight(tmp):
        return tmp[['industry_sw_level1', 'weight']].groupby('industry_sw_level1').sum()

    bm_weight = hs300_industry_df.groupby(['date']).apply(get_weight).reset_index() 
    return bm_weight

bm_industry_exposure = get_bm_industry_exposure(start_date=start_date, end_date=end_date)


# 基准在风格因子上的暴露
def get_bm_component(start_date='2019-01-01', end_date='2019-03-08'):
    index = DataSource('index_element_weight').read(start_date=start_date, end_date=end_date)
    hs300_df = index[index['instrument_index'] == '000300.HIX']
    return hs300_df

bm_component = get_bm_component(start_date=start_date, end_date=end_date)

组合优化求解¶

In [40]:

point_date = '2019-01-21'  

# 每月最后一天 
natural_month_date = [dt for dt in end_month_date if dt<=pd.Timestamp(point_date)][-1]
natural_month_date = natural_month_date.strftime('%Y-%m-%d')

# 交易日
trading_calendar_date = [dt for dt in sorted(list(set(bm_component['date']))) if dt<=pd.Timestamp(point_date)][-1]  
trading_calendar_date = trading_calendar_date.strftime('%Y-%m-%d')

 
special_bm_industry_exposure = bm_industry_exposure[bm_industry_exposure['date'] == trading_calendar_date
                                               ][['industry_sw_level1','weight']].set_index('industry_sw_level1')

# 组合优化
special_risk = special_risk.dropna()
instrument_name = list(set(special_risk.instrument[special_risk['date'] == natural_month_date]))   

n = len(instrument_name) # 股票数量
m = len(common_factor_ret.columns) # 因子数量,包含风格因子和行业因子
 
mu = monthly_price_df[list(set(monthly_price_df.columns).intersection(instrument_name))
                     ].pct_change().fillna(0).rolling(36).mean().ix[natural_month_date]

mu.sort_index(inplace=True) # 将mu按股票顺序排序
 
mu = np.array(mu)

# 计算共同风险因子协方差矩阵
sigma_tilde = np.array(common_risk.ix[natural_month_date, :])
 
# 计算特征因子协方差矩阵
d = np.diag(special_risk[special_risk['date'] == natural_month_date].set_index('instrument')['specified_return'].sort_index().values)
 
# 下面我们计算因子在股票上的载荷
special_factor_df = monthly_style_and_industry_factor[monthly_style_and_industry_factor['date'] == natural_month_date] 
F1 = special_factor_df
F1 = F1.set_index('instrument')
F1 = F1.ix[instrument_name, :]
F1 = F1.reset_index()
F1 = F1.sort_values('instrument')
F = np.array(F1[all_factor_name])
 
w = Variable(n)  # 权重, 形如（3525 ,1）
gamma = Parameter(nonneg=True) # 风险厌恶系数 
Lmax = Parameter() # 杠杆
industry_factor = [i for i in F1.columns if i[:15] == 'industry_prefix']  

right_bm_weight = pd.DataFrame(special_bm_industry_exposure, index= [int(i[-6:]) for i in industry_factor] )
h = right_bm_weight.weight/100 
special_bm_component = bm_component[bm_component['date'] == trading_calendar_date]
w_b = list(pd.DataFrame(special_bm_component.set_index('instrument') ,index=instrument_name).fillna(0)['weight']/100)

# 好像暂时不需要这个
special_style_factor_df = special_factor_df[['date','instrument','Value',
                                     'Growth','Size','Beta','Momentum','Volatility','EarningYields','Leverage','Liquidity']]
benchmark_factor_df = special_style_factor_df[special_style_factor_df['instrument'].isin(list(special_bm_component.instrument))]

tmp_merge_df = benchmark_factor_df.merge(special_bm_component, how='inner', on='instrument')
tmp_merge_df['weight'] = tmp_merge_df['weight'] / 100

for f in style_factor:
    tmp_merge_df[f] = tmp_merge_df[f] * tmp_merge_df['weight']

factor_risk_exp = np.array(tmp_merge_df[style_factor].sum())

# 行业中性
H = np.array(F1[industry_factor])
h = np.array(h)   
h = np.array([round(i, 10) for i in h]) # 精度控制

# 权重归一化
deviate = h.sum()-1
if deviate > 0 :
    h[h.argmax()] = h[h.argmax()] - np.abs(deviate)
elif deviate < 0:
    h[h.argmin()] = h[h.argmin()] + np.abs(deviate)
assert h.sum() == 1, 'WARING:权重之和不为1！'

# 风格中性
w_b = np.array(w_b)

n_factor = len(style_factor)  # 风格因子数量
X = np.array(F1[style_factor])   # 全部股票在风格因子上的暴露,格式形如（3000，3）

ret = mu.T*w  # 收益
risk = quad_form(w, F.dot(sigma_tilde).dot(F.T) + d)

In [41]:

#  组合优化   
t0 = datetime.datetime.now() 

# 确定优化目标，目前支持最大化收益、最小化风险、最大化风险调整收益
target = 'maximize_risk_adjusted_return'
 
if target == 'maximize_return':
    _obj = Maximize(ret)
elif target == 'minimize_risk':
    _obj =  -1 * Maximize(gamma*risk)
elif target == 'maximize_risk_adjusted_return':
    _obj = Maximize(ret - gamma*risk)

print('优化目标为： ', target)

constraint = [sum(w) == 1,   
                w >= 0,
                w <= 0.1,     
                w.T * H == h.T,
                (w.T-w_b.T) * X == np.zeros(len(style_factor))  
               ]

prob = Problem(_obj, constraint)
print("prob is DCP:",  prob.is_dcp()) # Problem must follow DCP rules.
Lmax.value = 1  
gamma.value = 0.1

try:
    prob.solve()
except Exception as e:
    print(e)

print("optimal value with OSQP:", prob.value)
print(prob.status)

优化目标为：  maximize_risk_adjusted_return
prob is DCP: True
optimal value with OSQP: 0.01130156690179871
optimal

验证优化结果¶

In [45]:

print('最大权重数值为： ', w.value.max(), '\n')
print('最小权重数值为： ', w.value.min(), '\n')
print('权重之和为： ', w.value.sum(), '\n')

portfolio_industry_risk = np.reshape(w.value.T, (d.shape[0], 1)).T.dot(H)

base_info_industry_sw = DataSource('basic_info_IndustrySw').read(start_date=start_date, end_date=end_date)
one_level_industry_info = base_info_industry_sw[base_info_industry_sw.industry_sw_level == 1]
industry_map = dict(zip(one_level_industry_info['code'], one_level_industry_info['name']))

T.plot(pd.DataFrame({'benchmark': h.T.tolist(), 'portfolio': portfolio_industry_risk.tolist()[0]},
                    index= [industry_map[i[-6:]] for i in industry_factor]
                   ), chart_type='column', title='行业风险敞口暴露')

portfolio_factor_risk = (w.value.T-w_b.T).dot(X).tolist()
portfolio_factor_risk = w.value.T.dot(X).tolist()
benchmark_factor_risk =  w_b.T.dot(X).tolist()
T.plot(pd.DataFrame({'benchmark': benchmark_factor_risk , 'portfolio': portfolio_factor_risk}, index=style_factor
                   ), chart_type='column', title='因子风险敞口暴露')

print('optimal run cost:', (datetime.datetime.now() - t0).seconds)

最大权重数值为：  0.09999999999989061 

最小权重数值为：  -1.7734994811935933e-13 

权重之和为：  0.9999999999357974

optimal run cost: 976

In [52]:

modify_style_factor_exposure = np.zeros(len(style_factor))
modify_style_factor_exposure[0] = 0.2 
constraint = [sum(w) == 1,   
                w >= 0,
                w <= 0.1,     
                w.T * H == h.T,
                (w.T-w_b.T) * X == modify_style_factor_exposure
               ]

prob = Problem(_obj, constraint)
print("prob is DCP:",  prob.is_dcp()) # Problem must follow DCP rules.
Lmax.value = 1  
gamma.value = 0.1

try:
    prob.solve()
except Exception as e:
    print(e)

print("optimal value with OSQP:", prob.value)
print(prob.status)  

print('最大权重数值为： ', w.value.max(), '\n')
print('最小权重数值为： ', w.value.min(), '\n')
print('权重之和为： ', w.value.sum(), '\n')

portfolio_industry_risk = np.reshape(w.value.T, (d.shape[0], 1)).T.dot(H)

base_info_industry_sw = DataSource('basic_info_IndustrySw').read(start_date=start_date, end_date=end_date)
one_level_industry_info = base_info_industry_sw[base_info_industry_sw.industry_sw_level == 1]
industry_map = dict(zip(one_level_industry_info['code'], one_level_industry_info['name']))

T.plot(pd.DataFrame({'benchmark': h.T.tolist(), 'portfolio': portfolio_industry_risk.tolist()[0]},
                    index= [industry_map[i[-6:]] for i in industry_factor]
                   ), chart_type='column', title='行业风险敞口暴露')

portfolio_factor_risk = (w.value.T-w_b.T).dot(X).tolist()
portfolio_factor_risk = w.value.T.dot(X).tolist()
benchmark_factor_risk =  w_b.T.dot(X).tolist()
T.plot(pd.DataFrame({'benchmark': benchmark_factor_risk , 'portfolio': portfolio_factor_risk}, index=style_factor
                   ), chart_type='column', title='因子风险敞口暴露')

print('optimal run cost:', (datetime.datetime.now() - t0).seconds)

prob is DCP: True
optimal value with OSQP: 0.010383237872855902
optimal
最大权重数值为：  0.09999999999800815 

最小权重数值为：  6.591102408370145e-14 

权重之和为：  0.9999999999357474

optimal run cost: 3670