克隆策略
In [246]:
import numpy as np
import pandas as pd
In [247]:
#获取股票代码列表
inst=D.instruments(start_date='2005-01-01', end_date=None, market='HK_STOCK')
len(inst)
inst[:5]
Out[247]:
['0001.HKEX', '0002.HKEX', '0003.HKEX', '0004.HKEX', '0005.HKEX']
In [248]:
#获取股票历史数据
df=pd.DataFrame({})
flag=True
for ins in inst:
    datas=D.history_data(instruments=ins, start_date='2014-01-01', end_date='2018-01-01',frequency='daily',fields= 'close')
    #print(ins)
    if type(datas)!=type(None) and not datas.empty and 'close' in datas.keys():
        df[ins]=datas['close']
        if flag:
            df['Date']=datas['date']
            flag=False
df.index=(df['Date'])
df.pop('Date')
df.head()
Out[248]:
0001.HKEX 0002.HKEX 0003.HKEX 0004.HKEX 0005.HKEX 0006.HKEX 0007.HKEX 0008.HKEX 0009.HKEX 0010.HKEX ... 8480.HKEX 8481.HKEX 8485.HKEX 8491.HKEX 8495.HKEX 8559.HKEX 8587.HKEX 8590.HKEX 8591.HKEX 8593.HKEX
Date
2014-01-02 230.210571 263.047974 660.567871 149.237625 664.553894 214.540298 4.712500 22.975527 0.091450 106.948311 ... 0.64 NaN NaN 0.51 0.550 NaN NaN NaN NaN NaN
2014-01-03 225.898819 257.915344 646.481689 145.732040 651.592712 211.068771 4.793750 22.776318 0.091450 104.646896 ... 0.67 NaN 0.32 0.51 0.530 NaN NaN 0.077 NaN NaN
2014-01-06 225.523880 257.273773 644.257568 143.353256 653.163757 207.944397 4.976562 22.709913 0.088500 103.970009 ... 0.64 NaN 0.52 0.58 0.470 NaN NaN 0.068 0.166058 6.3
2014-01-07 227.586029 256.846039 639.809326 143.854050 659.055176 205.167175 4.996875 22.975527 0.085550 103.699249 ... 0.74 NaN NaN 0.72 0.495 NaN NaN 0.065 0.160769 6.3
2014-01-08 228.148422 258.556915 638.326538 146.107635 670.838074 206.035049 4.976562 23.506752 0.086533 104.511513 ... 0.67 NaN NaN 0.82 0.480 NaN NaN 0.064 0.162885 6.3

5 rows × 2183 columns

In [249]:
#获取期间有空值的股票产品
row=df.shape[0]
empty=[]
for k in df.keys():
    tmp_df=pd.DataFrame(df[k].isnull().value_counts()).T
    if(False not in tmp_df.keys()):
        empty.append(k)
        continue
    tmp_row=tmp_df[False].values[0]
    if(tmp_row!=row):
        empty.append(k)
In [250]:
#删除有空值的产品
for k in empty:
    df.pop(k)
np.shape(df)
Out[250]:
(984, 1608)
In [251]:
#写入csv文件中
df.to_csv('HK_stock_Datas.csv')
In [252]:
#由于网站下载文件大小有限制,这里分成多个文件写入
for begin in np.arange(0,df.shape[1],100): 
    end=df.shape[1] if df.shape[1]<begin+100 else begin+100
    tmp_df=df.loc[:,df.keys()[begin:end]]
    tmp_df.to_csv('tmp'+str(int(begin/100))+'.csv')