精品课程

回归法单因子测试源码

由small_q创建,最终由small_q 被浏览 46 用户

import dai
import statsmodels.api as sm
import pandas as pd

factors = dai.query("""
    pragma enable_pushdown_window;
    select a.date, a.instrument, a.total_market_cap, b.returns
    from cn_stock_factors AS a
    INNER JOIN (
        SELECT date, instrument, m_lag(close,-1)/close - 1 AS returns
        FROM cn_stock_bar1d
        WHERE date >= '2023-01-01'
    ) AS b
    ON a.date = b.date AND a.instrument = b.instrument
    where a.date>'2023-01-01'
""").df()


# 对因子进行z-score标准化
factors['total_market_cap_z'] = factors.groupby('date')['total_market_cap'].transform(lambda x: (x - x.mean()) / x.std())
factors['total_market_cap'].fillna(factors['total_market_cap'].mean(), inplace=True)
factors['returns'].fillna(factors['returns'].mean(), inplace=True)
# 设置空的列表来储存回归结果
regression_results = []

# 对于每一个时间点,进行横截面回归
for date, data in factors.groupby('date'):
    y = data['returns']
    X = sm.add_constant(data['total_market_cap_z'])  # 添加常数项
    model = sm.OLS(y, X).fit()  # 使用普通最小二乘法进行回归
    
    regression_results.append(model.params)

# 将回归结果转换为DataFrame
regression_df = pd.DataFrame(regression_results)

# 查看回归系数的统计描述
# print(regression_df.describe())

\

标签

Python单因子模型