import numpy as np
import pandas as pd
#获取股票代码列表
inst=D.instruments(start_date='2005-01-01', end_date=None, market='HK_STOCK')
len(inst)
inst[:5]
#获取股票历史数据
df=pd.DataFrame({})
flag=True
for ins in inst:
datas=D.history_data(instruments=ins, start_date='2014-01-01', end_date='2018-01-01',frequency='daily',fields= 'close')
#print(ins)
if type(datas)!=type(None) and not datas.empty and 'close' in datas.keys():
df[ins]=datas['close']
if flag:
df['Date']=datas['date']
flag=False
df.index=(df['Date'])
df.pop('Date')
df.head()
#获取期间有空值的股票产品
row=df.shape[0]
empty=[]
for k in df.keys():
tmp_df=pd.DataFrame(df[k].isnull().value_counts()).T
if(False not in tmp_df.keys()):
empty.append(k)
continue
tmp_row=tmp_df[False].values[0]
if(tmp_row!=row):
empty.append(k)
#删除有空值的产品
for k in empty:
df.pop(k)
np.shape(df)
#写入csv文件中
df.to_csv('HK_stock_Datas.csv')
#由于网站下载文件大小有限制,这里分成多个文件写入
for begin in np.arange(0,df.shape[1],100):
end=df.shape[1] if df.shape[1]<begin+100 else begin+100
tmp_df=df.loc[:,df.keys()[begin:end]]
tmp_df.to_csv('tmp'+str(int(begin/100))+'.csv')