回归法单因子测试源码
由small_q创建,最终由small_q 被浏览 54 用户
import dai
import statsmodels.api as sm
import pandas as pd
factors = dai.query("""
pragma enable_pushdown_window;
select a.date, a.instrument, a.total_market_cap, b.returns
from cn_stock_factors AS a
INNER JOIN (
SELECT date, instrument, m_lag(close,-1)/close - 1 AS returns
FROM cn_stock_bar1d
WHERE date >= '2023-01-01'
) AS b
ON a.date = b.date AND a.instrument = b.instrument
where a.date>'2023-01-01'
""").df()
# 对因子进行z-score标准化
factors['total_market_cap_z'] = factors.groupby('date')['total_market_cap'].transform(lambda x: (x - x.mean()) / x.std())
factors['total_market_cap'].fillna(factors['total_market_cap'].mean(), inplace=True)
factors['returns'].fillna(factors['returns'].mean(), inplace=True)
# 设置空的列表来储存回归结果
regression_results = []
# 对于每一个时间点,进行横截面回归
for date, data in factors.groupby('date'):
y = data['returns']
X = sm.add_constant(data['total_market_cap_z']) # 添加常数项
model = sm.OLS(y, X).fit() # 使用普通最小二乘法进行回归
regression_results.append(model.params)
# 将回归结果转换为DataFrame
regression_df = pd.DataFrame(regression_results)
# 查看回归系数的统计描述
# print(regression_df.describe())
\