stock_prediction.py
3.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from google.colab import drive
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import LSTM
drive.mount('/content/drive')
df_price = pd.read_csv('/content/drive/My Drive/Colab Notebooks/삼성바이오로직스.csv', encoding='cp949')
pd.to_datetime(df_price['날짜'], format='%Y%m%d')
df_price['날짜'] = pd.to_datetime(df_price['날짜'], format='%Y%m%d')
df_price['연도'] =df_price['날짜'].dt.year
df_price['월'] =df_price['날짜'].dt.month
df_price['일'] =df_price['날짜'].dt.day
df = df_price.loc[df_price['연도']>=1990]
scaler = MinMaxScaler()
scale_cols = ['시가', '고가', '저가', '종가', '거래량']
df_scaled = scaler.fit_transform(df[scale_cols])
df_scaled = pd.DataFrame(df_scaled)
df_scaled.columns = scale_cols
TEST_SIZE = 200
window_size=20
def make_dataset(data, label, window_size=20):
feature_list = []
label_list = []
for i in range(len(data) - window_size):
feature_list.append(np.array(data.iloc[i:i+window_size]))
label_list.append(np.array(label.iloc[i+window_size]))
return np.array(feature_list), np.array(label_list)
def main():
train = df_scaled[:-TEST_SIZE]
test = df_scaled[-TEST_SIZE:]
feature_cols = ['시가', '고가', '저가', '거래량']
label_cols = ['종가']
train_feature = train[feature_cols]
train_label = train[label_cols]
# train dataset
train_feature, train_label = make_dataset(train_feature, train_label, 20)
# train, validation set 생성
from sklearn.model_selection import train_test_split
x_train, x_valid, y_train, y_valid = train_test_split(train_feature, train_label, test_size=0.2)
x_train.shape, x_valid.shape
test_feature = test[feature_cols]
test_label = test[label_cols]
# test dataset (실제 예측 해볼 데이터)
test_feature, test_label = make_dataset(test_feature, test_label, 20)
test_feature.shape, test_label.shape
model = Sequential()
model.add(LSTM(16,
input_shape=(train_feature.shape[1], train_feature.shape[2]),
activation='relu',
return_sequences=False)
)
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
early_stop = EarlyStopping(monitor='val_loss', patience=10)
filename = '/content/drive/My Drive/Colab Notebooks/tmp_samba.h5'
checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
# history = model.fit(x_train, y_train,
# epochs=200,
# batch_size=16,
# validation_data=(x_valid, y_valid),
# callbacks=[early_stop, checkpoint])
# weight 로딩
model.load_weights(filename)
model.summary()
# 예측
pred = model.predict(test_feature)
plt.figure(figsize=(12, 9))
plt.plot(test_label, label='actual')
plt.plot(pred, label='prediction')
plt.legend()
plt.show()
if __name__ == "__main__":
main()