复制链接
克隆策略
In [1]:
import pandas as pd
import numpy as np
from biglearning.module2.common.data import Outputs
from zipline.finance.commission import PerOrder
import os
from bigdatasource.api import DataSource
from biglearning.api import M
from biglearning.api import tools as T
from biglearning.module2.common.data import Outputs

import warnings
warnings.filterwarnings('ignore')
In [2]:
sd = '2022-01-01'
ed = '2023-05-09'
In [3]:
df = DataSource('bar1m_CN_STOCK_A').read(start_date=sd,end_date=ed,instruments=['002362.SZA'])

3.1 “潮汐”的定义 我们观察个股分钟频成交量的高点与低点来定义“涨潮”与“退潮”, 具体如下: 1)剔除开盘和收盘数据,仅考虑日内分钟频数据,为了减小个别异 常点的影响,我们首先计算个股每分钟的成交量及其前后 4 分钟成交 量的总和(共 9 分钟),作为该分钟“邻域成交量”。 2)假设“邻域成交量”最高点发生在第 t 分钟,这一分钟称为“顶峰 时刻”。 3)第 5~t-1 分钟里,“邻域成交量”最低点发生在第 m 分钟,这一点 的邻域成交量为 Vm,收盘价为 Cm,这一分钟称为“涨潮时刻”,从 “涨潮时刻”到“顶峰时刻”的过程记为“涨潮”。 4)第 t+1~233 分钟里,”邻域成交量“最低点发生在第 n 分钟里,这 一点的邻域成交量为 Vn,收盘价为 Cn,这一分钟称为“退潮时刻”, 从“顶峰时刻”到“退潮时刻”的过程记为“退潮”。 5)从“涨潮时刻”到“退潮时刻”的全过程记为一次“潮汐”。

3.2 “潮汐”过程的价格变动速率 我们首先来考察“潮汐”过程的价格变动速率,进而构造“全潮汐” 因子,具体过程如下: 1)如上述定义,我们记“涨潮时刻”发生在第 m 分钟,收盘价为 Cm; “退潮时刻”发生在第 n 分钟,收盘价为 Cn。 2)则全部“潮汐”过程的价格变化率为(Cn-Cm)/Cm。 金融工程报告 7 敬请关注文后特别声明与免责条款 3)进而全“潮汐”过程的价格变动速率为(Cn-Cm)/Cm/(n-m),我们 将此作为每日投资者出售或购买股票意愿强烈程度的代理变量。 4)我们计算最近 20 个交易日的价格变动速率的平均值,记为“全潮 汐”因子。 接下来我们将对上述构建的“全潮汐”因子进行单因子测试,我们在 全 A 样本中按照月度频率进行测试,测试中对因子进行市值和行业正 交化处理,测试区间为 2013 年 4 月至 2022 年 2 月(下同)。因子表 现如下所示。

In [4]:
df
Out[4]:
instrument date open close low high amount volume
0 002362.SZA 2022-01-04 09:31:00 17.940001 17.940001 17.879999 18.290001 15909196.0 883802
1 002362.SZA 2022-01-04 09:32:00 17.910000 17.820000 17.820000 17.940001 3491968.0 195000
2 002362.SZA 2022-01-04 09:33:00 17.820000 17.700001 17.620001 17.820000 2648165.0 149300
3 002362.SZA 2022-01-04 09:34:00 17.700001 17.680000 17.610001 17.719999 3584689.0 202700
4 002362.SZA 2022-01-04 09:35:00 17.670000 17.730000 17.650000 17.730000 1862097.0 105400
... ... ... ... ... ... ... ... ...
77755 002362.SZA 2023-05-09 14:56:00 31.840000 31.900000 31.840000 31.900000 16287989.0 510800
77756 002362.SZA 2023-05-09 14:57:00 31.889999 31.690001 31.690001 31.900000 18603670.0 584700
77757 002362.SZA 2023-05-09 14:58:00 31.680000 31.680000 31.680000 31.680000 820913.0 25900
77758 002362.SZA 2023-05-09 14:59:00 31.680000 31.680000 31.680000 31.680000 0.0 0
77759 002362.SZA 2023-05-09 15:00:00 31.690001 31.690001 31.690001 31.690001 20950259.0 661100

77760 rows × 8 columns

In [5]:
df = df.head(240)
In [6]:
df['邻域'] = df['volume']  +df['volume'].shift()+df['volume'].shift(2)+df['volume'].shift(3)+df['volume'].shift(4)+df['volume'].shift(-1)+df['volume'].shift(-2)+df['volume'].shift(-3)+df['volume'].shift(-4)

df.reset_index(inplace=True,drop=True)

vs = df.loc[0,'volume']
cs = df.loc[0,'close']


df.dropna(inplace=True)
In [7]:
df.reset_index(inplace=True,drop=True)
In [8]:
df
Out[8]:
instrument date open close low high amount volume 邻域
0 002362.SZA 2022-01-04 09:35:00 17.670000 17.730000 17.650000 17.730000 1862097.0 105400 2234202.0
1 002362.SZA 2022-01-04 09:36:00 17.709999 17.889999 17.709999 17.889999 2150419.0 120600 1435300.0
2 002362.SZA 2022-01-04 09:37:00 17.879999 17.930000 17.879999 17.930000 3726458.0 208300 1516783.0
3 002362.SZA 2022-01-04 09:38:00 17.930000 18.000000 17.910000 18.000000 3662693.0 203800 1524383.0
4 002362.SZA 2022-01-04 09:39:00 18.000000 18.000000 18.000000 18.040001 2977605.0 165300 1395183.0
... ... ... ... ... ... ... ... ... ...
227 002362.SZA 2022-01-04 14:52:00 17.930000 17.930000 17.910000 17.940001 509059.0 28400 414500.0
228 002362.SZA 2022-01-04 14:53:00 17.930000 17.940001 17.920000 17.940001 503844.0 28100 446300.0
229 002362.SZA 2022-01-04 14:54:00 17.950001 17.959999 17.950001 17.980000 1011225.0 56300 430800.0
230 002362.SZA 2022-01-04 14:55:00 17.950001 17.940001 17.940001 17.959999 1269131.0 70700 345100.0
231 002362.SZA 2022-01-04 14:56:00 17.930000 17.920000 17.920000 17.950001 1007723.0 56200 604900.0

232 rows × 9 columns

In [23]:
df = DataSource('bar1m_CN_STOCK_A').read(start_date=sd,end_date=ed,instruments=['002362.SZA'])
df = df.head(240)
In [24]:
df.reset_index(inplace=True,drop=True)

vs = df.loc[0,'volume']
cs = df.loc[0,'close']
s = 0 
df['邻域'] = df['volume']  +df['volume'].shift()+df['volume'].shift(2)+df['volume'].shift(3)+df['volume'].shift(4)+df['volume'].shift(-1)+df['volume'].shift(-2)+df['volume'].shift(-3)+df['volume'].shift(-4)




df.dropna(inplace=True)
df.reset_index(inplace=True,drop=True)
idxmax = df['邻域'].idxmax()  #顶峰

if idxmax != 0:
    df_1 = df.loc[:idxmax]
    df_2 = df.loc[idxmax:]

    idxmin_m = df_1['邻域'].idxmin()
    idxmin_n = df_2['邻域'].idxmin()

    
    vm = df.loc[idxmin_m,'volume']
    cm = df.loc[idxmin_m,'close']
    vn = df.loc[idxmin_n,'volume']
    cn = df.loc[idxmin_n,'close']

    df['vm'] = vm
    df['cm'] = cm
    df['vn'] = vn
    df['cn'] = cn
    df['nm'] = idxmin_n - idxmin_m
    
else:
    idxmin = df['邻域'].idxmin()
    
    
    vn = df.loc[idxmin,'volume']
    cn = df.loc[idxmin,'close']
    
    df['vm'] = vs
    df['cm'] = cs
    df['vn'] = vn
    df['cn'] = cn
    df['nm'] = idxmin
    



    
df['factor']  = (df['cn'] - df['cm'])/df['cm']/df['nm']
In [27]:
df
Out[27]:
instrument date open close low high amount volume 邻域 vm cm vn cn nm factor
0 002362.SZA 2022-01-04 09:35:00 17.670000 17.730000 17.650000 17.730000 1862097.0 105400 2234202.0 883802 17.940001 105400 17.73 0 -inf
1 002362.SZA 2022-01-04 09:36:00 17.709999 17.889999 17.709999 17.889999 2150419.0 120600 1435300.0 883802 17.940001 105400 17.73 0 -inf
2 002362.SZA 2022-01-04 09:37:00 17.879999 17.930000 17.879999 17.930000 3726458.0 208300 1516783.0 883802 17.940001 105400 17.73 0 -inf
3 002362.SZA 2022-01-04 09:38:00 17.930000 18.000000 17.910000 18.000000 3662693.0 203800 1524383.0 883802 17.940001 105400 17.73 0 -inf
4 002362.SZA 2022-01-04 09:39:00 18.000000 18.000000 18.000000 18.040001 2977605.0 165300 1395183.0 883802 17.940001 105400 17.73 0 -inf
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
227 002362.SZA 2022-01-04 14:52:00 17.930000 17.930000 17.910000 17.940001 509059.0 28400 414500.0 883802 17.940001 105400 17.73 0 -inf
228 002362.SZA 2022-01-04 14:53:00 17.930000 17.940001 17.920000 17.940001 503844.0 28100 446300.0 883802 17.940001 105400 17.73 0 -inf
229 002362.SZA 2022-01-04 14:54:00 17.950001 17.959999 17.950001 17.980000 1011225.0 56300 430800.0 883802 17.940001 105400 17.73 0 -inf
230 002362.SZA 2022-01-04 14:55:00 17.950001 17.940001 17.940001 17.959999 1269131.0 70700 345100.0 883802 17.940001 105400 17.73 0 -inf
231 002362.SZA 2022-01-04 14:56:00 17.930000 17.920000 17.920000 17.950001 1007723.0 56200 604900.0 883802 17.940001 105400 17.73 0 -inf

232 rows × 15 columns

In [15]:
df
Out[15]:
instrument date open close low high amount volume
0 002362.SZA 2022-01-04 09:31:00 17.940001 17.940001 17.879999 18.290001 15909196.0 883802
1 002362.SZA 2022-01-04 09:32:00 17.910000 17.820000 17.820000 17.940001 3491968.0 195000
2 002362.SZA 2022-01-04 09:33:00 17.820000 17.700001 17.620001 17.820000 2648165.0 149300
3 002362.SZA 2022-01-04 09:34:00 17.700001 17.680000 17.610001 17.719999 3584689.0 202700
4 002362.SZA 2022-01-04 09:35:00 17.670000 17.730000 17.650000 17.730000 1862097.0 105400
... ... ... ... ... ... ... ... ...
77755 002362.SZA 2023-05-09 14:56:00 31.840000 31.900000 31.840000 31.900000 16287989.0 510800
77756 002362.SZA 2023-05-09 14:57:00 31.889999 31.690001 31.690001 31.900000 18603670.0 584700
77757 002362.SZA 2023-05-09 14:58:00 31.680000 31.680000 31.680000 31.680000 820913.0 25900
77758 002362.SZA 2023-05-09 14:59:00 31.680000 31.680000 31.680000 31.680000 0.0 0
77759 002362.SZA 2023-05-09 15:00:00 31.690001 31.690001 31.690001 31.690001 20950259.0 661100

77760 rows × 8 columns