克隆策略
In [1]:
from userlib import no
In [2]:
wholeinstruments = D.history_data(D.instruments(start_date='2017-07-03', end_date='2017-07-03') , start_date='2017-07-03', end_date='2017-07-03', fields=['list_date'])
oldinstruments = wholeinstruments[wholeinstruments.list_date < '2008-01-01']
numpy.random.seed(111)
instruments = np.random.choice(oldinstruments.instrument, size=80, replace=False)
In [3]:
split_date = datetime.datetime.strptime('2016-06-01', '%Y-%m-%d')
x_train = []
train_y = []
x_test = []
test_y = []
train_input = []
train_output = []
test_input = []
test_output = []
###############################
data_ = D.features(instruments, start_date='2008-01-01', end_date='2016-10-01', 
           fields=['fs_operating_revenue_yoy_0','amount_0',
                   'close_0','fs_deducted_profit_0','fs_net_profit_0','market_cap_float_0',
                   'fs_common_equity_0','fs_cash_equivalents_0','fs_account_receivable_0','fs_fixed_assets_0','fs_proj_matl_0',
                   
                   'fs_construction_in_process_0','fs_fixed_assets_disp_0','fs_account_payable_0', 'fs_total_liability_0',
                   'fs_paicl_up_capital_0','fs_capital_reserves_0','fs_surplus_reserves_0', 'fs_undistributed_profit_0',
                   'fs_eqy_belongto_parcomsh_0',
                   
                   'fs_total_equity_0','fs_gross_revenues_0', 'fs_total_operating_costs_0','fs_selling_expenses_0',
                   'fs_financial_expenses_0',
                   'fs_general_expenses_0', 'fs_operating_profit_0','fs_total_profit_0','fs_income_tax_0',
                   
                   'fs_net_income_0', 'fs_cash_ratio_0','fs_non_current_liabilities_0','fs_current_liabilities_0',
                   'fs_non_current_assets_0',
                   'fs_current_assets_0','fs_net_cash_flow_0','fs_free_cash_flow_0','fs_operating_revenue_0'])

p_data = D.history_data(['000001.SHA'], '2013-09-01', '2016-10-01', ['close', 'amount','turn'])
p_data=p_data.sort_index(ascending=False)
p_data.dropna(inplace=True)
p_data.reset_index(drop=True, inplace=True)
data   = D.history_data(instruments, '2013-09-01', '2016-10-01', ['close', 'amount','turn'])
#######################################################################################################################33
#################################3333
for i in range(len(instruments)):
    instrument = instruments[i]
    print(instrument )
    mb=D.history_data(instrument, '2016-09-30', '2016-09-30',['industry_sw_level1'])
    mdd=str(mb.industry_sw_level1[0])
    Str='SW'+mdd+'.SHA'
    h_data = D.history_data([Str], '2013-09-01', '2016-10-01', ['close', 'amount','turn'])
    h_data=h_data.sort_index(ascending=False)
    h_data.dropna(inplace=True)
    h_data.reset_index(drop=True, inplace=True)
    #print(h_data)
    data1 = data[data.instrument==instrument]
    data1=data1.sort_index(ascending=False)
    data1.dropna(inplace=True)
    data1.reset_index(drop=True, inplace=True)
    data1['CLOSE']=p_data['close']
    data1['AMOUNT']=p_data['amount']
    data1['TURN']=p_data['turn']
    data1['h_close']=h_data['close']
    data1['h_amount']=h_data['amount']
    data1['h_turn']=h_data['turn']  
    ############################################################################################################3333
    data0 = data_[data_.instrument==instrument]
    data0=data0.fillna(value=0.0001)
    data0=data0.sort_index(ascending=False)
    data0.dropna(inplace=True)
    data0.reset_index(drop=True, inplace=True)
    #################################################################
    x, y, z = no.train_data(data0,data1, future_len=60)
    x_train =x_train + list(x[z<split_date])
    train_y =train_y+ list(y[z<split_date])
    #######################################################################
    #print(np.array(x_train).shape)
    #############################################################
    x_test =x_test+  list(x[z>=split_date])
    test_y = test_y+ list(y[z>=split_date])
    x=[]
    if (i+1) % 2== 0:
        print(i)
600405.SHA
000951.SZA
1
600641.SHA
002125.SZA
3
000016.SZA
600865.SHA
5
000830.SZA
600512.SHA
7
000062.SZA
600806.SHA
9
600696.SHA
600015.SHA
11
600432.SHA
002110.SZA
13
002171.SZA
600362.SHA
15
600821.SHA
000801.SZA
17
002117.SZA
000792.SZA
19
600600.SHA
600426.SHA
21
000785.SZA
600900.SHA
23
600478.SHA
600726.SHA
25
000758.SZA
600870.SHA
27
002009.SZA
600697.SHA
29
002067.SZA
600365.SHA
31
000736.SZA
002121.SZA
33
600490.SHA
002192.SZA
35
000409.SZA
600198.SHA
37
002001.SZA
002034.SZA
39
600649.SHA
002045.SZA
41
600837.SHA
000090.SZA
43
000408.SZA
600698.SHA
45
600859.SHA
002008.SZA
47
600970.SHA
000902.SZA
49
600642.SHA
000949.SZA
51
600714.SHA
600687.SHA
53
600895.SHA
000825.SZA
55
600800.SHA
000046.SZA
57
600723.SHA
600817.SHA
59
600052.SHA
600733.SHA
61
600590.SHA
000929.SZA
63
600058.SHA
600834.SHA
65
600704.SHA
600689.SHA
67
000401.SZA
600338.SHA
69
600299.SHA
000407.SZA
71
000539.SZA
002081.SZA
73
600081.SHA
000790.SZA
75
000430.SZA
000501.SZA
77
600825.SHA
000897.SZA
79
In [4]:
X_train=np.array(x_train)
In [5]:
x_train = []
In [6]:
X_test = np.array(x_test) 
In [7]:
x_test = []
In [8]:
y_train =np.array(train_y)
In [9]:
y_test =np.array(test_y)
In [10]:
def shuffle_data(data_x, data_y):
    rand = np.random.choice(len(data_x), size=len(data_x), replace=False)
    x = data_x[rand]
    y = data_y[rand]
    return x, y
X_train, y_train = shuffle_data(X_train, y_train )
In [11]:
from userlib import note
Using TensorFlow backend.
In [12]:
model=note.train_model()
In [13]:
model.fit(X_train, y_train, batch_size=64, epochs=24, verbose=2, validation_split=0.1, shuffle=True)
Train on 39163 samples, validate on 4352 samples
Epoch 1/24
217s - loss: 0.1047 - mean_absolute_error: 0.2391 - val_loss: 0.0868 - val_mean_absolute_error: 0.2548
Epoch 2/24
210s - loss: 0.0213 - mean_absolute_error: 0.1104 - val_loss: 0.0149 - val_mean_absolute_error: 0.0915
Epoch 3/24
209s - loss: 0.0127 - mean_absolute_error: 0.0836 - val_loss: 0.0156 - val_mean_absolute_error: 0.0935
Epoch 4/24
212s - loss: 0.0093 - mean_absolute_error: 0.0698 - val_loss: 0.0108 - val_mean_absolute_error: 0.0788
Epoch 5/24
209s - loss: 0.0072 - mean_absolute_error: 0.0605 - val_loss: 0.0086 - val_mean_absolute_error: 0.0657
Epoch 6/24
210s - loss: 0.0062 - mean_absolute_error: 0.0551 - val_loss: 0.0058 - val_mean_absolute_error: 0.0552
Epoch 7/24
209s - loss: 0.0053 - mean_absolute_error: 0.0504 - val_loss: 0.0049 - val_mean_absolute_error: 0.0487
Epoch 8/24
209s - loss: 0.0046 - mean_absolute_error: 0.0466 - val_loss: 0.0058 - val_mean_absolute_error: 0.0499
Epoch 9/24
211s - loss: 0.0043 - mean_absolute_error: 0.0441 - val_loss: 0.0048 - val_mean_absolute_error: 0.0457
Epoch 10/24
211s - loss: 0.0038 - mean_absolute_error: 0.0420 - val_loss: 0.0053 - val_mean_absolute_error: 0.0524
Epoch 11/24
222s - loss: 0.0035 - mean_absolute_error: 0.0405 - val_loss: 0.0040 - val_mean_absolute_error: 0.0441
Epoch 12/24
243s - loss: 0.0033 - mean_absolute_error: 0.0386 - val_loss: 0.0039 - val_mean_absolute_error: 0.0405
Epoch 13/24
211s - loss: 0.0030 - mean_absolute_error: 0.0375 - val_loss: 0.0033 - val_mean_absolute_error: 0.0367
Epoch 14/24
211s - loss: 0.0028 - mean_absolute_error: 0.0360 - val_loss: 0.0035 - val_mean_absolute_error: 0.0353
Epoch 15/24
212s - loss: 0.0025 - mean_absolute_error: 0.0344 - val_loss: 0.0035 - val_mean_absolute_error: 0.0404
Epoch 16/24
213s - loss: 0.0024 - mean_absolute_error: 0.0338 - val_loss: 0.0028 - val_mean_absolute_error: 0.0331
Epoch 17/24
210s - loss: 0.0023 - mean_absolute_error: 0.0326 - val_loss: 0.0035 - val_mean_absolute_error: 0.0383
Epoch 18/24
209s - loss: 0.0022 - mean_absolute_error: 0.0317 - val_loss: 0.0023 - val_mean_absolute_error: 0.0321
Epoch 19/24
211s - loss: 0.0021 - mean_absolute_error: 0.0313 - val_loss: 0.0028 - val_mean_absolute_error: 0.0348
Epoch 20/24
212s - loss: 0.0019 - mean_absolute_error: 0.0305 - val_loss: 0.0034 - val_mean_absolute_error: 0.0399
Epoch 21/24
214s - loss: 0.0019 - mean_absolute_error: 0.0298 - val_loss: 0.0028 - val_mean_absolute_error: 0.0334
Epoch 22/24
247s - loss: 0.0019 - mean_absolute_error: 0.0290 - val_loss: 0.0027 - val_mean_absolute_error: 0.0312
Epoch 23/24
218s - loss: 0.0018 - mean_absolute_error: 0.0286 - val_loss: 0.0037 - val_mean_absolute_error: 0.0382
Epoch 24/24
223s - loss: 0.0016 - mean_absolute_error: 0.0282 - val_loss: 0.0020 - val_mean_absolute_error: 0.0281
Out[13]:
<keras.callbacks.History at 0x7f24a752da20>
In [14]:
model.evaluate(X_test, y_test, batch_size=32, verbose=0)
Out[14]:
[0.02084387504565182, 0.097712876565420811]
In [15]:
predictions = model.predict(X_test)
print(predictions.shape)
(1675, 1)
In [16]:
import matplotlib.pyplot as plt
m=0
n=820
plt.plot(predictions[m:n])
plt.plot(test_y[m:n])
Out[16]:
[<matplotlib.lines.Line2D at 0x7f24b9c57f60>]