构建行业中性化哑变量矩阵时,1月数据,跑10分钟都跑不出来原因是?
由bqn737zt创建,最终由small_q 被浏览 14 用户
#提取一级行业,可以获得5000多只股票的行业列表。
sql ='''
select *
from cn_stock_industry_component
where date between '2023-0-01' and '2023-01-07'
'''
import dai
ww = dai.query(sql).df()
www_uni = ww.drop_duplicates(subset='instrument')
www_uni
#获取cn_stock_bar1d表数据
sql = '''
select *
from cn_stock_bar1d
where date between '2023-01-01' and '2024-01-01'
'''
import dai
df5 = dai.query(sql).df()
df5
#获取Pb数据
sql ='''
select *
from cn_stock_valuation
where date between '2023-01-01' and '2024-01-01'
'''
import dai
pb = dai.query(sql).df()
pb
#将上面三个表拼接到一起
import pandas as pd
merge_df5_pb = pd.merge(df5,pb[['date','instrument','pb']],on=['date','instrument'])
merge_df5_pb
import pandas as pd
merge_df5_pb_www_uni = pd.merge(merge_df5_pb,www_uni[['instrument','industry_level1_name']],on=['instrument'])
merge_df5_pb_www_uni
#获取市值数据
sql ='''
select *
from cn_stock_valuation
where date between '2023-01-01' and '2024-01-01'
'''
import dai
mkt = dai.query(sql).df()
mkt
#将市值表拼接上
import pandas as pd
merge_df5_pb_www_uni_mkt = pd.merge(merge_df5_pb_www_uni,mkt[['date','instrument','total_market_cap']],on=['date','instrument'])
merge_df5_pb_www_uni_mkt
#为了市值中性化、行业中性化 ,先构造行业哑变量
df6 = merge_df5_pb_www_uni_mkt
import pandas as pd
df6_pb = df6.pivot(index='instrument',columns='date',values='pb').dropna()
df6_pb
import pandas as pd
df6_mkt = df6.pivot(index='instrument',columns='date',values='total_market_cap').dropna()
df6_mkt
#获取行业哑变量矩阵
# get_code_list函数定义
def get_code_list(m):
# 从all_stocks中筛选特定行业m的所有股票代码
return df6[df6['industry_level1_name'] == m]['instrument']
def industry_exposure(instrument):
df8 = pd.DataFrame(index=[x.lower() for x in instrument],columns=df6['industry_level1_name'])
for m in df8.columns:
temp = list(set(df8.index).intersection(set([x.lower() for x in get_code_list(m)])))
df8.loc[temp,m] = 1
return df8.fillna(0)
df7 = industry_exposure(df6)
df7
#后面进行中心化处理,目前这部分问题就是df7运行超过10分钟还没有结果,请问原因是什么?