최재은

Add : Event Embedding code & data, 최종 보고서 수정

Showing 41 changed files with 4105 additions and 0 deletions
This diff could not be displayed because it is too large.
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "stock-prediction.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "9EBLJGRkA7au",
"colab_type": "code",
"outputId": "6aa769f2-a86f-463c-893c-7a8b0b3aca08",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 122
}
},
"source": [
"from google.colab import auth\n",
"auth.authenticate_user()\n",
"\n",
"from google.colab import drive\n",
"drive.mount('/content/gdrive')"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n",
"\n",
"Enter your authorization code:\n",
"··········\n",
"Mounted at /content/gdrive\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "xz6TIi8x-3wI",
"colab_type": "code",
"outputId": "47fb88a4-fa0e-4327-b5d0-9ab7f42041f2",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"%tensorflow_version 1.x"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"TensorFlow 1.x selected.\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "ykRUwvkvIoKH",
"colab_type": "code",
"outputId": "6d116660-4a7f-4c85-a733-6956210958c9",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 68
}
},
"source": [
"!cd gdrive/'My Drive'/'capstone 2' && ls"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"main.go\t\t preprocessed_data.gsheet title.csv Word2vec.model\n",
"metadata.tsv\t title2020.csv\t\t title.gsheet\n",
"preprocessed_data.csv title2.csv\t\t word2vec\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "C-V3pgCEX5xR",
"colab_type": "text"
},
"source": [
"### 데이터 전처리 \n",
"[~ 포토 \\~]가 들어간 기사 제목은 데이터 리스트에서 삭제 하고 [\\~]가 들어간 기사 제목은 [\\~] 삭제\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "oZ2Q2_uWViO3",
"colab_type": "code",
"outputId": "0fc44251-8920-4e1a-9472-5603d0ce1264",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 785
}
},
"source": [
"!pip3 install hanja==0.13.0\n",
"!pip3 install git+https://github.com/haven-jeon/PyKoSpacing.git"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"Requirement already satisfied: hanja==0.13.0 in /usr/local/lib/python3.6/dist-packages (0.13.0)\n",
"Collecting git+https://github.com/haven-jeon/PyKoSpacing.git\n",
" Cloning https://github.com/haven-jeon/PyKoSpacing.git to /tmp/pip-req-build-n_sau7zy\n",
" Running command git clone -q https://github.com/haven-jeon/PyKoSpacing.git /tmp/pip-req-build-n_sau7zy\n",
"Requirement already satisfied (use --upgrade to upgrade): pykospacing==0.1 from git+https://github.com/haven-jeon/PyKoSpacing.git in /usr/local/lib/python3.6/dist-packages\n",
"Collecting tensorflow<=1.6.0,>=1.4.0\n",
" Using cached https://files.pythonhosted.org/packages/d9/0f/fbd8bb92459c75db93040f80702ebe4ba83a52cdb6ad930654c31dc0b711/tensorflow-1.6.0-cp36-cp36m-manylinux1_x86_64.whl\n",
"Requirement already satisfied: keras>=2.1.5 in /usr/local/lib/python3.6/dist-packages (from pykospacing==0.1) (2.2.5)\n",
"Requirement already satisfied: h5py>=2.7.1 in /usr/local/lib/python3.6/dist-packages (from pykospacing==0.1) (2.10.0)\n",
"Requirement already satisfied: argparse>=1.4.0 in /usr/local/lib/python3.6/dist-packages (from pykospacing==0.1) (1.4.0)\n",
"Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (1.12.0)\n",
"Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.6/dist-packages (from tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (0.34.2)\n",
"Requirement already satisfied: astor>=0.6.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (0.8.1)\n",
"Requirement already satisfied: numpy>=1.13.3 in /usr/local/lib/python3.6/dist-packages (from tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (1.18.2)\n",
"Collecting tensorboard<1.7.0,>=1.6.0\n",
" Using cached https://files.pythonhosted.org/packages/b0/67/a8c91665987d359211dcdca5c8b2a7c1e0876eb0702a4383c1e4ff76228d/tensorboard-1.6.0-py3-none-any.whl\n",
"Requirement already satisfied: protobuf>=3.4.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (3.10.0)\n",
"Requirement already satisfied: termcolor>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (1.1.0)\n",
"Requirement already satisfied: grpcio>=1.8.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (1.27.2)\n",
"Requirement already satisfied: gast>=0.2.0 in /usr/local/lib/python3.6/dist-packages (from tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (0.3.3)\n",
"Requirement already satisfied: absl-py>=0.1.6 in /usr/local/lib/python3.6/dist-packages (from tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (0.9.0)\n",
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.6/dist-packages (from keras>=2.1.5->pykospacing==0.1) (3.13)\n",
"Requirement already satisfied: scipy>=0.14 in /usr/local/lib/python3.6/dist-packages (from keras>=2.1.5->pykospacing==0.1) (1.4.1)\n",
"Requirement already satisfied: keras-applications>=1.0.8 in /usr/local/lib/python3.6/dist-packages (from keras>=2.1.5->pykospacing==0.1) (1.0.8)\n",
"Requirement already satisfied: keras-preprocessing>=1.1.0 in /usr/local/lib/python3.6/dist-packages (from keras>=2.1.5->pykospacing==0.1) (1.1.0)\n",
"Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.7.0,>=1.6.0->tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (3.2.1)\n",
"Requirement already satisfied: html5lib==0.9999999 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.7.0,>=1.6.0->tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (0.9999999)\n",
"Requirement already satisfied: bleach==1.5.0 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.7.0,>=1.6.0->tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (1.5.0)\n",
"Requirement already satisfied: werkzeug>=0.11.10 in /usr/local/lib/python3.6/dist-packages (from tensorboard<1.7.0,>=1.6.0->tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (1.0.0)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from protobuf>=3.4.0->tensorflow<=1.6.0,>=1.4.0->pykospacing==0.1) (46.0.0)\n",
"Building wheels for collected packages: pykospacing\n",
" Building wheel for pykospacing (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for pykospacing: filename=pykospacing-0.1-cp36-none-any.whl size=2255598 sha256=249ac07d0d8b26e4b9d4a1821995b953f9f90c6206cb378f183ff2be5001b607\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-th23h_qr/wheels/4d/45/58/e26cb2b7f6a063d234158c6fd1e5700f6e15b99d67154340ba\n",
"Successfully built pykospacing\n",
"\u001b[31mERROR: magenta 0.3.19 has requirement tensorflow>=1.12.0, but you'll have tensorflow 1.6.0 which is incompatible.\u001b[0m\n",
"Installing collected packages: tensorboard, tensorflow\n",
" Found existing installation: tensorboard 2.2.0\n",
" Uninstalling tensorboard-2.2.0:\n",
" Successfully uninstalled tensorboard-2.2.0\n",
" Found existing installation: tensorflow 2.2.0rc2\n",
" Uninstalling tensorflow-2.2.0rc2:\n",
" Successfully uninstalled tensorflow-2.2.0rc2\n",
"Successfully installed tensorboard-1.6.0 tensorflow-1.6.0\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "Ddf2Fp7-JAYI",
"colab_type": "code",
"outputId": "0d3029dd-6db6-41eb-8142-24d42bd516b6",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"import hanja\n",
"import pandas as pd\n",
"import re\n",
"from pykospacing import spacing\n",
"\n",
"news_list = pd.read_csv('gdrive/My Drive/capstone 2/title.csv', encoding='utf-8')\n",
"print(len(news_list))\n",
"photo_regexp = \"\\[.*포토.*\\]\"\n",
"brac_regexp = \"\\[.*\\]\"\n",
"spechar_with_regexp = \"[^가-힣ㄱ-ㅎㅏ-ㅣa-zA-Z0-9\\s\\+\\.\\-]\"\n",
"spechar_without_regexp = \"[^가-힣ㄱ-ㅎㅏ-ㅣa-zA-Z0-9\\s\\.]\"\n",
"percentage = \"(\\+\\d*\\.\\d*)|(\\-\\d*\\.\\d*)\"\n",
"is_exist_regexp = \"[가-힣ㄱ-ㅎㅏ-ㅣa-zA-Z0-9\\s\\.]\"\n",
"\n",
"\n",
"for i, title in enumerate(news_list['title']):\n",
" if re.search(photo_regexp,title):\n",
" news_list.drop(i, inplace=True)\n",
" else :\n",
" if \"\" in title:\n",
" title = title.replace(\"\",\"상승\")\n",
" if \"\" in title:\n",
" title = title.replace(\"\",\"하락\")\n",
"\n",
" title = hanja.translate(title, 'substitution')\n",
" title = re.sub(brac_regexp, '', title)\n",
" title = re.sub(\"\\.{3}|\\.{2}\", '', title)\n",
"\n",
" if re.search(percentage,title):\n",
" title = re.sub(spechar_with_regexp, '',title).lstrip()\n",
" else:\n",
" title = re.sub(spechar_without_regexp, '', title).lstrip()\n",
" \n",
" if ( not re.search(is_exist_regexp,title) ) or (len(title) == 0):\n",
" news_list.drop(i, inplace=True)\n",
" \n",
" news_list[\"title\"][i] = spacing(title)\n",
" \n",
"\n",
"\n",
"df = pd.DataFrame(news_list, columns=['title','date','publication'])\n",
"df.to_csv('gdrive/My Drive/capstone 2/without_percentage_preprocessed_data.csv',sep=',',encoding='UTF-8',index=False) \n"
],
"execution_count": 0,
"outputs": [
{
"output_type": "stream",
"text": [
"100960\n"
],
"name": "stdout"
}
]
}
]
}
\ No newline at end of file
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
import numpy as np
print(np.load("./resultEmbeding.pickle", allow_pickle=True))
D
\ No newline at end of file
{"class_name": "Sequential", "config": [{"class_name": "Flatten", "config": {"name": "flatten_1", "trainable": true, "batch_input_shape": [null, 5, 80], "dtype": "float32"}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 512, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.8}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 1024, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_2", "trainable": true, "rate": 0.8}}, {"class_name": "Dropout", "config": {"name": "dropout_3", "trainable": true, "rate": 0.8}}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "units": 2, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}], "keras_version": "2.0.2", "backend": "theano"}
\ No newline at end of file
{"class_name": "Sequential", "config": [{"class_name": "Conv1D", "config": {"name": "conv1d_1", "trainable": true, "batch_input_shape": [null, 5, 80], "dtype": "float32", "filters": 128, "kernel_size": [1], "strides": [1], "padding": "valid", "dilation_rate": [1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Conv1D", "config": {"name": "conv1d_2", "trainable": true, "filters": 128, "kernel_size": [3], "strides": [1], "padding": "same", "dilation_rate": [1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "MaxPooling1D", "config": {"name": "max_pooling1d_1", "trainable": true, "strides": [2], "pool_size": [2], "padding": "valid"}}, {"class_name": "Flatten", "config": {"name": "flatten_1", "trainable": true}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.8}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 2, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}], "keras_version": "2.0.2", "backend": "theano"}
\ No newline at end of file
{"class_name": "Sequential", "config": [{"class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "batch_input_shape": [null, 5, 80], "dtype": "float32", "return_sequences": false, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 0.0010000000474974513}}, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.6}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 2, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 0.0010000000474974513}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}], "keras_version": "2.0.2", "backend": "theano"}
\ No newline at end of file
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
days Close
1 68.960003
2 64.560002
3 65.460003
4 69.800002
5 70.199998
6 70.650002
7 69.879997
8 70.459997
9 70.489998
10 70.759997
11 72.05
12 72.249998
13 72.64
14 73.979998
15 76.899998
16 77.529999
17 79.63
18 77.809999
19 78.840002
20 78.939998
21 80.900002
22 78.74
23 78.359998
24 81.210003
25 84.630001
26 88.410001
27 90.13
28 87.81
29 86.809998
30 85.290003
31 88.229997
32 88.929999
33 88.990001
34 44.859999
35 44.5
36 44.119998
37 41.789999
38 42.809999
39 42.75
40 40.53
41 39.349999
42 39.83
43 40.27
44 40.320002
45 40.96
46 41.180001
47 42.249999
48 42.959999
49 43.699999
50 42.829999
51 42.549999
52 42.500001
53 42.529999
54 41.750001
55 42.800002
56 41.669999
57 40.890001
58 41.089999
59 41.890001
60 42.330001
61 43.559999
62 43.74
63 41.920002
64 42.659998
65 41.040001
66 37.260002
67 35.350001
68 35.620001
69 37.090001
70 35.510002
71 37.18
72 35.500001
73 36.979999
74 36.190001
75 35.949998
76 35.539999
77 36.060001
78 36.429999
79 36.210001
80 37.149999
81 36.679998
82 37.240001
83 36.969998
84 36.419999
85 35.610001
86 34.13
87 34.770001
88 35.549999
89 35.360002
90 35.839999
91 37.550002
92 37.550002
93 39.759999
94 39.700001
95 39.779999
96 40.740001
97 40.56
98 39.759999
99 40.300001
100 40.039999
101 38.240001
102 37.92
103 36.539999
104 36.920001
105 37.650001
106 35.809999
107 35.900001
108 36
109 37.129999
110 37.980001
111 38.309999
112 37.61
113 37.859999
114 38.550001
115 38.889999
116 37.76
117 37.100001
118 37.309999
119 36.370001
120 36.810001
121 36.500001
122 37.980001
123 37.390001
124 37.63
125 38.250001
126 38.100001
127 38.240001
128 38.35
129 40.750001
130 41.55
131 41.489998
132 43.190001
133 43.630001
134 43.289999
135 43.999999
136 43.810001
137 43.630001
138 43.989999
139 43.800001
140 42.650002
141 42.75
142 43.190001
143 43.220001
144 42.71
145 42.989999
146 42.650002
147 43.820002
148 43.380002
149 43.999999
150 46.1
151 47.68
152 46.25
153 47.150001
154 46.300002
155 45.830001
156 45.869998
157 45.739999
158 45.77
159 46.059999
160 45.739999
161 45.840001
162 46.570001
163 46.889998
164 46.260001
165 46.22
166 48.799999
167 48.679999
168 49.780001
169 51.309999
170 51.400002
171 50.820002
172 49.610001
173 49.87
174 51.210001
175 52.64
176 53.189999
177 52.110001
178 51.9
179 53.199999
180 53.840001
181 53.440001
182 51.080001
183 52.34
184 53.609999
185 54.440001
186 53.749999
187 52.78
188 51.699998
189 51.299999
190 50.370001
191 51.589999
192 49.249999
193 53.739998
194 54.000001
195 53.440001
196 52.21
197 54.939999
198 56.140003
199 55.659999
200 56.789998
201 56.100002
202 57.029997
203 55.41
204 54.469998
205 57.590003
206 57.500001
207 59.949997
208 61.85
209 61.149998
210 60.229997
211 59.900002
212 60.110001
213 61.179998
214 61.54
215 61.449998
216 62.279997
217 64.949998
218 64.520001
219 64.560002
220 64.959998
221 66.52
222 67.110001
223 69.339998
224 69.659999
225 68.100003
226 67.819997
227 71.600003
228 72.63
229 71.820002
230 74.049999
231 73.949997
232 74.08
233 74.329999
234 74.909999
235 74.98
236 72.009999
237 72.180003
238 71.109999
239 71.379998
240 72.110002
241 73.5
242 74.019999
243 73.350003
244 74.229997
245 73.570002
246 71.45
247 71.890003
248 74.750002
249 74.97
250 74.38
251 76.299997
252 76.049998
253 80.860002
254 83.899998
255 84.29
256 85.590003
257 84.710003
258 82.489999
259 79.04
260 76.089999
261 77.670002
262 76.039998
263 74.200003
264 72.33
265 72.03
266 75.000001
267 75.509999
268 75.419997
269 72.100001
270 71.850002
271 67.299998
272 67.599998
273 68.809999
274 64.949998
275 67.309999
276 64.709999
277 67.639999
278 69.220002
279 70.57
280 70.29
281 69.079999
282 71.319997
283 71.75
284 71.46
285 70.990003
286 68.489999
287 69.099999
288 69.609998
289 67.720001
290 65.479997
291 66.310002
292 65.660001
293 63.930001
294 63.189998
295 65.680001
296 67.319999
297 66.23
298 64.310003
299 64.659998
300 63.990003
301 61.809999
302 61.670003
303 60.160002
304 59.959997
305 59.51
306 58.710002
307 62.329998
308 62.750001
309 62.72
310 62.649999
311 61.169998
312 67.210003
313 71.240002
314 69.790002
315 68.670003
316 67.990001
317 66.709998
318 66.469999
319 64.810001
320 66.22
321 65.650001
322 67.629999
323 67.039999
324 65.750003
325 66.169999
326 68.149998
327 69.359999
328 70.390002
329 69.599998
330 71.619997
331 71.139999
332 71.129999
333 71.890003
334 71.890003
335 71.029997
336 70.6
337 68.149998
338 67.700001
339 67.790003
340 64.979999
341 65.259998
342 63.179997
343 64.510001
344 63.380002
345 63.149997
346 63.340001
347 64.329997
348 63.549999
349 61.219999
350 59.77
351 62.170001
352 61.660003
353 59.999998
354 59.719998
355 58.559999
356 60.760002
357 59.240001
358 57.000003
359 58.33
360 57.609997
361 59.379997
362 57.560002
363 57.200001
364 57.47
365 57.860003
366 59.580002
367 58.829998
368 58.990002
369 57.429999
370 56.02
371 58.970001
372 57.270002
373 57.949998
374 57.000003
375 55.770001
376 55.4
377 55
378 55.649999
379 52.960001
380 52.250001
381 50.670002
382 52.37
383 52.899999
384 54.1
385 60.500003
386 60.720001
387 61.419997
388 61.930002
389 63.87
390 63.400002
391 65.589999
392 67.96
393 67.180002
394 68.159998
395 69.589997
396 68.300001
397 67.210003
398 64.780001
399 63.59
400 64.069998
401 63.650002
402 63.940001
403 66.449999
404 67.98
405 67.589998
406 67.909999
407 66.560001
408 67.619999
409 67.309999
410 67.810003
411 68.749998
412 66.979998
413 66.48
414 66.959997
415 67.849998
416 68.380003
417 71.48
418 70.030001
419 72.799997
420 72.519998
421 72.499997
422 72.63
423 74.200003
424 74.170002
425 74.1
426 73.890002
427 73.77
428 75.26
429 74.65
430 73.000002
431 75.749998
432 77.610001
433 76.41
434 77.01
435 76.98
436 74.859998
437 74.08
438 75.380003
439 74.829997
440 74.220003
441 74.629999
442 73.81
443 73.23
444 75.26
445 75.020001
446 75.399997
447 74.289998
448 74.529997
449 78.989999
450 79.950001
451 81.460002
452 81.049999
453 81.68
454 82.189999
455 80.409998
456 80.419998
457 81.08
458 79.160003
459 78.979999
460 78.290003
461 79.710002
462 80.51
463 82.449998
464 83.339998
465 83.12
466 84.350001
467 85.000003
468 84.050001
469 85.609997
470 85.850002
471 86.470003
472 88.599998
473 90.309998
474 91.630001
475 89.54
476 91.809999
477 91.799998
478 91.660002
479 91.320001
480 91.120003
481 91.27
482 89.83
483 87.040003
484 88.259997
485 88.750002
486 86.140002
487 89.050002
488 88.549997
489 87.719998
490 85.47
491 86.309999
492 84.759997
493 82.900002
494 82.199999
495 81.510003
496 81.520003
497 80.870002
498 84.839999
499 83.800002
500 85.659998
501 85.049997
502 85.47
503 92.570003
504 96.999997
505 95.800003
506 94.620003
507 97.099999
508 94.949997
509 89.070003
510 88.500003
511 86.789997
512 85.699999
513 86.700002
514 86.249998
515 85.379998
516 85.939998
517 85.550002
518 85.73
519 84.739997
520 84.749997
521 83.939999
522 84.149997
523 86.150002
524 86.180003
525 83.270003
526 84.88
527 84.700003
528 85.300003
529 85.210001
530 84.829999
531 85.899997
532 89.199999
533 89.509999
534 89.070003
535 88.510003
536 83.929998
537 84.610001
538 87.060003
539 85.409999
540 86.32
541 88.190002
542 87.719998
543 87.999998
544 87.969997
545 89.870001
546 88.400001
547 89.999997
548 89.570001
549 89.590001
550 91.130003
551 91.479998
552 93.869999
553 93.960001
554 93.519998
555 95.849998
556 95.460002
557 93.239998
558 93.750003
559 92.909997
560 93.650001
561 94.5
562 94.270001
563 94.679997
564 93.650001
565 94.250001
566 92.590003
567 92.190001
568 90.240003
569 91.430003
570 90.349998
571 90.4
572 90.270003
573 90.969999
574 93.509997
575 93.239998
576 95.349999
577 98.839999
578 99.919997
579 99.800001
580 99.47
581 100.390001
582 100.400002
583 100.809998
584 103.920002
585 105.060001
586 106.880003
587 107.34
588 108.739999
589 109.36
590 107.519998
591 107.34
592 109.440001
593 110.020001
594 111.98
595 113.539995
596 112.889994
597 110.689997
598 113.619997
599 114.35
600 118.769995
601 121.189997
602 118.4
603 121.33
604 122.670004
605 123.639999
606 124.069996
607 124.490005
608 120.190001
609 120.380005
610 117.500006
611 118.749994
612 120.499994
613 125.090006
614 123.66
615 121.550005
616 123.900005
617 122.999998
618 122.339996
619 119.650002
620 121.889999
621 120.559996
622 122.040003
623 121.259998
624 127.17
625 132.749994
626 132.299997
627 130.330006
628 132.349998
629 132.389999
630 134.069998
631 137.730001
632 138.099997
633 138.910002
634 138.119997
635 140
636 143.750006
637 143.700005
638 134.890003
639 137.260004
640 146.000004
641 143.849995
642 141.430006
643 131.759998
644 135.000006
645 136.49
646 131.85
647 135.249998
648 135.030006
649 134.009996
650 126.389996
651 125.000004
652 127.790001
653 124.029995
654 119.899994
655 117.049995
656 122.060003
657 122.219994
658 127.569996
659 132.510002
660 131.069996
661 135.299999
662 132.249996
663 126.820005
664 134.079998
665 136.249994
666 138.480005
667 144.160002
668 136.760006
669 135.010006
670 131.769999
671 136.710005
672 135.490004
673 136.849995
674 137.200003
675 138.809999
676 138.410004
677 140.919994
678 140.770004
679 140.309994
680 144.150002
681 148.280003
682 153.179995
683 152.769999
684 154.499998
685 153.470001
686 156.34
687 158.449995
688 157.919996
689 156.239998
690 161.449997
691 167.909998
692 167.859997
693 166.789999
694 162.230001
695 167.249996
696 166.980003
697 169.579996
698 172.750002
699 173.500006
700 170.420002
701 174.359999
702 186.160002
703 185.929996
704 182.780005
705 184.699995
706 185.090004
707 186.999994
708 189.949995
709 187.440004
710 187.870001
711 186.180002
712 191.789997
713 186.300005
714 175.469997
715 165.370007
716 153.759995
717 169.960005
718 166.109997
719 164.299995
720 166.390003
721 163.950001
722 168.850006
723 168.459997
724 171.540001
725 172.539997
726 174.809996
727 180.219999
728 184.289999
729 182.220005
730 178.859995
731 179.810003
732 185.5
733 189.949995
734 194.300001
735 194.209999
736 188.540003
737 190.860003
738 191.829998
739 190.390005
740 184.400002
741 182.979996
742 183.119999
743 187.209999
744 193.910006
745 198.799997
746 198.950001
747 198.570005
748 199.829994
749 198.079994
750 194.84
751 194.930002
752 180.049995
753 177.639994
754 171.249994
755 179.399994
756 178.020002
757 172.690001
758 178.780006
759 169.039997
760 159.639996
761 160.889997
762 161.359995
763 155.639997
764 139.070005
765 135.600006
766 130.009998
767 130.009998
768 131.540007
769 132.179995
770 135.360001
771 133.750004
772 131.649996
773 129.359997
774 122.000002
775 121.239998
776 125.480001
777 129.449999
778 124.860001
779 129.399998
780 127.459993
781 124.629995
782 122.180006
783 123.820004
784 121.540005
785 119.459997
786 119.740004
787 119.150003
788 122.959997
789 129.909996
790 125.020004
791 121.729996
792 124.619995
793 124.490005
794 120.930004
795 122.249994
796 119.690002
797 127.350004
798 126.030001
799 127.940004
800 126.610001
801 126.730003
802 132.819996
803 129.670004
804 133.270006
805 139.530003
806 140.979996
807 145.059996
808 140.250006
809 143.010002
810 143.5
811 149.530005
812 147.489998
813 151.609999
814 153.080006
815 155.890003
816 152.84
817 151.439995
818 154.549999
819 147.140003
820 147.780005
821 148.380005
822 153.700006
823 154.489998
824 161.040001
825 168.160004
826 160.199995
827 162.890003
828 168.939995
829 169.73
830 172.240004
831 175.050001
832 173.950003
833 179.999994
834 180.940002
835 184.729996
836 186.66
837 182.59
838 185.060003
839 183.450006
840 188.159994
841 189.959995
842 186.260004
843 189.730003
844 187.619995
845 183.599997
846 185.899996
847 178.190006
848 177.049994
849 181.169994
850 186.429995
851 187.009995
852 186.690001
853 188.749994
854 186.1
855 185.369997
856 185.190006
857 189.429996
858 185.640003
859 181.610004
860 185.640003
861 180.809999
862 173.26
863 172.370007
864 176.840002
865 181.43
866 178.750006
867 180.900002
868 175.270006
869 173.159998
870 173.25
871 177.390001
872 168.260006
873 170.089994
874 167.440001
875 174.680006
876 168.180004
877 170.119995
878 175.160004
879 179.549997
880 174.249996
881 176.629997
882 172.579998
883 173.880001
884 169.639997
885 172.810003
886 171.809994
887 165.150002
888 166.290001
889 162.019997
890 166.26
891 159.029995
892 162.119999
893 154.399996
894 157.080004
895 159.880001
896 158.950006
897 156.659994
898 153.229996
899 160.640005
900 164.190006
901 163.570005
902 169.549995
903 173.559994
904 176.73
905 179.300005
906 179.320005
907 175.740004
908 175.389996
909 173.530006
910 175.840006
911 174.289997
912 176.790001
913 172.549997
914 173.639996
915 174.670006
916 173.739998
917 169.529995
918 166.189999
919 166.960003
920 161.220005
921 160.179995
922 157.919996
923 151.68
924 151.609999
925 152.649996
926 148.940004
927 140.359995
928 139.879997
929 127.830002
930 134.089998
931 140.909994
932 131.049995
933 126.840006
934 128.709995
935 131.930002
936 128.239998
937 105.259999
938 113.659998
939 109.120001
940 100.100001
941 97.069999
942 98.140003
943 89.159998
944 89.789999
945 88.740002
946 96.799999
947 110.26
948 104.079999
949 97.949999
950 101.890002
951 97.4
952 98.439997
953 91.489998
954 96.870001
955 98.229999
956 96.380003
957 92.089998
958 99.909997
959 104.550003
960 111.039998
961 107.589999
962 106.959998
963 110.989997
964 103.300001
965 99.099998
966 98.239999
967 95.879998
968 94.77
969 90.12
970 96.439998
971 90.240003
972 88.140001
973 89.910002
974 86.289999
975 80.49
976 82.580001
977 92.949998
978 90.800002
979 94.999998
980 92.669998
981 88.929999
982 92.47
983 95.899999
984 91.410003
985 94.000002
986 99.719999
987 100.06
988 98.209998
989 94.999998
990 98.27
991 94.749999
992 95.430001
993 89.159998
994 89.429997
995 89.999997
996 85.74
997 86.380001
998 85.039997
999 85.810001
1000 86.61
1001 86.289999
1002 85.349998
1003 90.750001
1004 94.580002
1005 93.02
1006 91.01
1007 92.699999
1008 90.579997
1009 88.66
1010 87.709998
1011 85.329997
1012 83.379999
1013 82.330002
1014 78.200001
1015 82.83
1016 88.36
1017 88.36
1018 89.640002
1019 90.73
1020 94.2
1021 92.999999
1022 90.13
1023 91.509998
1024 92.979999
1025 93.549998
1026 96.459998
1027 99.719999
1028 102.510003
1029 97.830003
1030 96.82
1031 99.270002
1032 99.16
1033 94.530001
1034 94.369997
1035 90.639998
1036 91.199998
1037 86.950001
1038 90.250003
1039 91.159997
1040 89.189999
1041 89.310001
1042 87.939997
1043 88.37
1044 91.169997
1045 88.839997
1046 85.300003
1047 83.11
1048 88.629999
1049 92.679998
1050 96.350002
1051 95.929999
1052 95.420001
1053 99.659998
1054 101.52
1055 101.620003
1056 101.590002
1057 107.660001
1058 106.500001
1059 106.490001
1060 109.869998
1061 106.850002
1062 104.490002
1063 105.120003
1064 108.689998
1065 112.710003
1066 115.989998
1067 118.450001
1068 115.000002
1069 116.320005
1070 119.57
1071 120.220001
1072 118.309998
1073 117.639996
1074 121.450003
1075 123.419994
1076 120.499994
1077 121.759996
1078 121.510004
1079 125.4
1080 123.900005
1081 124.729998
1082 123.900005
1083 125.139994
1084 125.829996
1085 127.240002
1086 132.070005
1087 132.709993
1088 132.500002
1089 129.060003
1090 129.190006
1091 129.570002
1092 124.420004
1093 119.489998
1094 122.949997
1095 122.419998
1096 126.650002
1097 127.450006
1098 125.869997
1099 124.179998
1100 122.5
1101 130.780003
1102 133.050001
1103 135.069994
1104 135.809998
1105 139.349998
1106 139.490002
1107 140.949995
1108 143.740005
1109 144.67
1110 143.849995
1111 142.719995
1112 140.250006
1113 139.949999
1114 136.969997
1115 136.090004
1116 136.349997
1117 135.580006
1118 135.879999
1119 139.480001
1120 137.370007
1121 134.009996
1122 136.219994
1123 139.859997
1124 142.440002
1125 141.970005
1126 142.430002
1127 142.829998
1128 140.02
1129 138.609995
1130 135.400002
1131 137.220003
1132 136.359997
1133 138.520006
1134 142.34
1135 142.269999
1136 146.879997
1137 147.519999
1138 151.750002
1139 152.910002
1140 151.509996
1141 156.739996
1142 157.819994
1143 159.990004
1144 160.100006
1145 160.000004
1146 160.030005
1147 162.790001
1148 163.390001
1149 166.430004
1150 165.549997
1151 165.110001
1152 163.91
1153 165.509996
1154 164.720005
1155 162.830002
1156 165.310005
1157 168.419996
1158 166.779999
1159 159.589994
1160 164.000002
1161 164.600002
1162 166.330002
1163 169.220001
1164 169.059998
1165 169.400005
1166 167.41
1167 169.450006
1168 170.049994
1169 168.210005
1170 165.300005
1171 165.180002
1172 166.549994
1173 170.309999
1174 172.930006
1175 171.140005
1176 172.559998
1177 172.160002
1178 173.719997
1179 175.160004
1180 181.869997
1181 184.550005
1182 185.020002
1183 184.020006
1184 184.480003
1185 185.5
1186 183.820002
1187 182.369995
1188 186.150002
1189 185.379997
1190 185.349997
1191 180.860001
1192 184.9
1193 186.019999
1194 190.009996
1195 190.250002
1196 189.270006
1197 190.469994
1198 190.810001
1199 190.019997
1200 191.289999
1201 190.559996
1202 188.050005
1203 189.860006
1204 198.759996
1205 204.920004
1206 205.199997
1207 203.939995
1208 202.480001
1209 197.370005
1210 192.399998
1211 196.349995
1212 188.500002
1213 189.309994
1214 188.749994
1215 190.810001
1216 194.029995
1217 194.340002
1218 201.460005
1219 202.98
1220 203.250006
1221 201.990004
1222 204.450006
1223 206.630003
1224 206.999998
1225 205.960001
1226 200.509996
1227 199.919996
1228 205.879999
1229 204.440006
1230 204.190001
1231 200.589998
1232 199.909996
1233 196.969995
1234 196.230005
1235 196.479998
1236 193.320005
1237 188.949999
1238 189.870007
1239 197.800001
1240 196.429996
1241 194.669996
1242 196.979996
1243 194.169998
1244 195.030005
1245 191.859999
1246 195.43
1247 198.229998
1248 200.360006
1249 202.100006
1250 209.040005
1251 211.609997
1252 209.100006
1253 211.639997
1254 210.730003
days ReturnClose
1 -0.41877255
2 -6.38051161
3 1.39405355
4 6.63000122
5 0.57306016
6 0.64103136
7 -1.08988673
8 0.82999431
9 0.04257877
10 0.38303165
11 1.82306819
12 0.27758223
13 0.53979517
14 1.8447109
15 3.94701281
16 0.8192471
17 2.70863024
18 -2.28557202
19 1.32374118
20 0.12683409
21 2.48290353
22 -2.66996532
23 -0.48260351
24 3.6370662
25 4.21130141
26 4.46650119
27 1.94548013
28 -2.57405969
29 -1.13882474
30 -1.75094463
31 3.44705581
32 0.79338323
33 0.06747105
34 -49.58984325
35 -0.80249444
36 -0.85393708
37 -5.28104965
38 2.44077536
39 -0.14015184
40 -5.19298246
41 -2.9114261
42 1.21982468
43 1.10469495
44 0.12416687
45 1.58729655
46 0.53711182
47 2.59834379
48 1.68047341
49 1.72253263
50 -1.99084673
51 -0.65374739
52 -0.11750412
53 0.07058353
54 -1.83399487
55 2.51497239
56 -2.6401938
57 -1.8718455
58 0.48911224
59 1.94695064
60 1.05036999
61 2.90573582
62 0.41322545
63 -4.1609465
64 1.76525755
65 -3.7974615
66 -9.21052365
67 -5.12614304
68 0.76379064
69 4.12689489
70 -4.25990552
71 4.70289469
72 -4.51855568
73 4.16900833
74 -2.13628454
75 -0.66317489
76 -1.14047016
77 1.46314579
78 1.02606209
79 -0.60389241
80 2.59596237
81 -1.26514404
82 1.52672582
83 -0.72503489
84 -1.4876901
85 -2.22404729
86 -4.15613861
87 1.87518605
88 2.24330738
89 -0.53445009
90 1.3574575
91 4.77121386
92 0
93 5.88547772
94 -0.15090041
95 0.20150629
96 2.41327809
97 -0.44182866
98 -1.97238905
99 1.35815396
100 -0.64516624
101 -4.49549961
102 -0.83682268
103 -3.63924314
104 1.03996171
105 1.97724805
106 -4.88712338
107 0.25133204
108 0.27854874
109 3.13888611
110 2.28925942
111 0.86887307
112 -1.8271966
113 0.66471417
114 1.82250929
115 0.88196625
116 -2.90562877
117 -1.74787871
118 0.56603233
119 -2.51942649
120 1.20978825
121 -0.84216243
122 4.05479441
123 -1.55344914
124 0.64188016
125 1.64762424
126 -0.39215685
127 0.36745406
128 0.28765428
129 6.25815124
130 1.96318768
131 -0.14440915
132 4.09738029
133 1.01875432
134 -0.77928488
135 1.64010168
136 -0.43181365
137 -0.41086509
138 0.82511573
139 -0.43191181
140 -2.62556843
141 0.23446189
142 1.02924211
143 0.06946052
144 -1.18001154
145 0.65558183
146 -0.79087464
147 2.74325896
148 -1.00410767
149 1.42922308
150 4.77272965
151 3.42733189
152 -2.99916107
153 1.94594811
154 -1.802755
155 -1.01512091
156 0.08727253
157 -0.28340747
158 0.06559029
159 0.63360061
160 -0.694746
161 0.2186314
162 1.5924956
163 0.68713119
164 -1.34356372
165 -0.08646995
166 5.58199697
167 -0.24590164
168 2.25965904
169 3.07351942
170 0.17541025
171 -1.12840463
172 -2.38095425
173 0.52408586
174 2.68698817
175 2.79242135
176 1.04483093
177 -2.03045313
178 -0.40299558
179 2.50481503
180 1.2030113
181 -0.74294204
182 -4.41616758
183 2.46671687
184 2.42644058
185 1.54822238
186 -1.26745405
187 -1.80464934
188 -2.04623342
189 -0.77369249
190 -1.81286163
191 2.42207261
192 -4.53576283
193 9.11674942
194 0.48381654
195 -1.03703702
196 -2.30164853
197 5.22888144
198 2.18420827
199 -0.85501242
200 2.0301815
201 -1.21499564
202 1.65774504
203 -2.84060509
204 -1.69644829
205 5.72793302
206 -0.1562806
207 4.26086253
208 3.16931292
209 -1.13177365
210 -1.50449882
211 -0.54789144
212 0.35058263
213 1.78006485
214 0.58843088
215 -0.14624959
216 1.35069004
217 4.28709237
218 -0.66204313
219 0.06199783
220 0.61957247
221 2.40148099
222 0.8869528
223 3.32289818
224 0.46149554
225 -2.23944304
226 -0.41116885
227 5.57358621
228 1.43854324
229 -1.11523888
230 3.10498042
231 -0.13504659
232 0.17579852
233 0.33747165
234 0.78030406
235 0.0934468
236 -3.96105762
237 0.23608388
238 -1.48241058
239 0.37969203
240 1.02270107
241 1.92760777
242 0.70748163
243 -0.90515538
244 1.19971911
245 -0.88912168
246 -2.88161199
247 0.61581945
248 3.97829862
249 0.2943117
250 -0.78698146
251 2.58133504
252 -0.3276527
253 6.32479175
254 3.75957943
255 0.46484149
256 1.54229802
257 -1.02815746
258 -2.62071057
259 -4.18232397
260 -3.73228871
261 2.07649234
262 -2.09862747
263 -2.41977255
264 -2.52021957
265 -0.41476566
266 4.12328335
267 0.67999732
268 -0.11919216
269 -4.40201025
270 -0.34673925
271 -6.33264283
272 0.44576524
273 1.78994236
274 -5.6096513
275 3.63356593
276 -3.86272476
277 4.52789375
278 2.33590039
279 1.95030043
280 -0.39676917
281 -1.72144117
282 3.24261441
283 0.60292066
284 -0.40418118
285 -0.65770641
286 -3.52162825
287 0.89064098
288 0.73805934
289 -2.71512291
290 -3.3077436
291 1.26757031
292 -0.98024579
293 -2.63478522
294 -1.15752071
295 3.94050179
296 2.49695185
297 -1.61913104
298 -2.89898384
299 0.54423104
300 -1.0361816
301 -3.4067884
302 -0.2264941
303 -2.4485178
304 -0.33245511
305 -0.75049537
306 -1.34430852
307 6.16589316
308 0.67383766
309 -0.04781036
310 -0.11160874
311 -2.36233204
312 9.8741298
313 5.99612977
314 -2.03537333
315 -1.60481296
316 -0.99024606
317 -1.88263418
318 -0.35976466
319 -2.49736426
320 2.17558861
321 -0.86076563
322 3.01599081
323 -0.87239392
324 -1.92421841
325 0.63877716
326 2.99229111
327 1.77549675
328 1.48501011
329 -1.12232416
330 2.9022975
331 -0.67020109
332 -0.01405679
333 1.06847183
334 0
335 -1.19628038
336 -0.60537381
337 -3.47025779
338 -0.66030376
339 0.13294239
340 -4.14515987
341 0.43090028
342 -3.18725263
343 2.10510298
344 -1.75166483
345 -0.36289838
346 0.30087729
347 1.56298703
348 -1.212495
349 -3.66640446
350 -2.36850543
351 4.01539401
352 -0.82032812
353 -2.69219092
354 -0.46666668
355 -1.94239625
356 3.75683579
357 -2.50164738
358 -3.78122546
359 2.33332795
360 -1.23436139
361 3.07238343
362 -3.06499679
363 -0.62543604
364 0.47202622
365 0.67862015
366 2.97269082
367 -1.25881835
368 0.27197689
369 -2.64452102
370 -2.45516111
371 5.26597822
372 -2.88282003
373 1.1873511
374 -1.63933569
375 -2.15789813
376 -0.66344091
377 -0.72202166
378 1.18181636
379 -4.83377906
380 -1.34063442
381 -3.02392147
382 3.35503835
383 1.01202788
384 2.26843294
385 11.82995009
386 0.36363304
387 1.15282607
388 0.8303566
389 3.1325657
390 -0.7358666
391 3.45425383
392 3.61335727
393 -1.14773102
394 1.45876149
395 2.09800329
396 -1.85370895
397 -1.59589749
398 -3.61553622
399 -1.83698824
400 0.75483252
401 -0.65552679
402 0.45561507
403 3.92555202
404 2.30248461
405 -0.57370109
406 0.47344431
407 -1.98792228
408 1.59254505
409 -0.45844425
410 0.74283763
411 1.38621879
412 -2.57454553
413 -0.74648853
414 0.72201715
415 1.32915329
416 0.78114225
417 4.53348474
418 -2.02853805
419 3.9554419
420 -0.38461403
421 -0.02757998
422 0.17931449
423 2.16164533
424 -0.04043261
425 -0.09438047
426 -0.28339811
427 -0.16240628
428 2.01979124
429 -0.81052352
430 -2.21031212
431 3.76711771
432 2.45544957
433 -1.54619377
434 0.78523753
435 -0.03895598
436 -2.75396467
437 -1.04194232
438 1.75486366
439 -0.72964444
440 -0.81517309
441 0.55240634
442 -1.09875253
443 -0.78580138
444 2.77208794
445 -0.31889317
446 0.50652625
447 -1.47214727
448 0.32305695
449 5.98417037
450 1.21534626
451 1.88868165
452 -0.50331818
453 0.77729921
454 0.62438663
455 -2.16571483
456 0.01243626
457 0.82069388
458 -2.36802787
459 -0.22739261
460 -0.87363384
461 1.81376797
462 1.00363565
463 2.40963607
464 1.07944211
465 -0.26397649
466 1.47978946
467 0.77060106
468 -1.11764937
469 1.85603329
470 0.28034693
471 0.72219101
472 2.4632762
473 1.93002262
474 1.46163551
475 -2.28091343
476 2.53517869
477 -0.01089315
478 -0.15250109
479 -0.37093715
480 -0.21900788
481 0.16461479
482 -1.57773639
483 -3.1058633
484 1.40164747
485 0.55518357
486 -2.940845
487 3.37822142
488 -0.56148792
489 -0.93732245
490 -2.56497726
491 0.98279981
492 -1.7958545
493 -2.19442551
494 -0.84439443
495 -0.8394112
496 0.01226843
497 -0.79735154
498 4.90910956
499 -1.22583335
500 2.21956558
501 -0.71211886
502 0.49383071
503 8.30701182
504 4.78556104
505 -1.23710725
506 -1.23173274
507 2.62100605
508 -2.21421423
509 -6.19272689
510 -0.63994609
511 -1.9322101
512 -1.2559028
513 1.16686466
514 -0.51903574
515 -1.00869568
516 0.65589132
517 -0.45380034
518 0.21040093
519 -1.15479179
520 0.0118008
521 -0.95574989
522 0.25017632
523 2.37671429
524 0.03482414
525 -3.3766534
526 1.93346576
527 -0.21206056
528 0.7083825
529 -0.10551231
530 -0.44595939
531 1.26134388
532 3.84167883
533 0.34753364
534 -0.49156072
535 -0.62871896
536 -5.17456202
537 0.81020257
538 2.89564114
539 -1.89524919
540 1.06545019
541 2.16636006
542 -0.53294477
543 0.31919745
544 -0.03409205
545 2.15983183
546 -1.63569599
547 1.80995021
548 -0.47777335
549 0.0223289
550 1.71894406
551 0.38406122
552 2.61259407
553 0.09587941
554 -0.46828756
555 2.49144573
556 -0.40688159
557 -2.32558554
558 0.54698092
559 -0.89600637
560 0.79647403
561 0.90763373
562 -0.24338519
563 0.43491672
564 -1.08787076
565 0.64068339
566 -1.76127107
567 -0.43201424
568 -2.11519468
569 1.31870563
570 -1.18123697
571 0.05534256
572 -0.14380199
573 0.77544697
574 2.79212711
575 -0.28873811
576 2.26297838
577 3.6601993
578 1.09267302
579 -0.12009208
580 -0.33066232
581 0.92490299
582 0.00996214752503349
583 0.40836254
584 3.08501544
585 1.09699671
586 1.73234531
587 0.4303864
588 1.30426588
589 0.5701683
590 -1.68251829
591 -0.16740886
592 1.95640116
593 0.52997076
594 1.78149335
595 1.39310145
596 -0.57248637
597 -1.94879716
598 2.64703232
599 0.64249518
600 3.86532138
601 2.03755334
602 -2.30216773
603 2.47466216
604 1.10442924
605 0.79073528
606 0.34778147
607 0.33852584
608 -3.45409577
609 0.15808636
610 -2.39242306
611 1.06381952
612 1.47368428
613 3.80913878
614 -1.14318165
615 -1.7062874
616 1.93336068
617 -0.72639787
618 -0.536587
619 -2.19878542
620 1.8721245
621 -1.09115023
622 1.22761036
623 -0.63913879
624 4.87382657
625 4.3878226
626 -0.33898081
627 -1.48903329
628 1.54990555
629 0.03022365
630 1.26897727
631 2.72991949
632 0.26863864
633 0.58653513
634 -0.56871715
635 1.36113745
636 2.67857571
637 -0.0347833
638 -6.13082929
639 1.75698788
640 6.36747759
641 -1.47260886
642 -1.68230037
643 -6.83731004
644 2.4590225
645 1.10369921
646 -3.39951645
647 2.57868639
648 -0.16265582
649 -0.75539506
650 -5.68614299
651 -1.09976426
652 2.23199753
653 -2.94233193
654 -3.3298405
655 -2.3769801
656 4.28022914
657 0.1310757
658 4.37735417
659 3.87238861
660 -1.08671495
661 3.22728552
662 -2.25425205
663 -4.10585343
664 5.72464336
665 1.6184338
666 1.63670539
667 4.10167302
668 -5.1331825
669 -1.27961387
670 -2.39982731
671 3.7489611
672 -0.89240067
673 1.00375744
674 0.25576033
675 1.17346645
676 -0.28816008
677 1.81344551
678 -0.10643628
679 -0.32678127
680 2.73680291
681 2.86507176
682 3.30455348
683 -0.26765636
684 1.13242064
685 -0.66666473
686 1.87007166
687 1.34961942
688 -0.33448975
689 -1.06382855
690 3.33461282
691 4.00123947
692 -0.02977845
693 -0.63743478
694 -2.73397567
695 3.09436909
696 -0.1614308
697 1.55706848
698 1.8693278
699 0.43415571
700 -1.77521838
701 2.31193343
702 6.76760901
703 -0.12355286
704 -1.69418118
705 1.05043766
706 0.2111581
707 1.03192499
708 1.57754069
709 -1.32139567
710 0.22940514
711 -0.89955767
712 3.0132103
713 -2.86250174
714 -5.81320865
715 -5.75596408
716 -7.02062739
717 10.53590695
718 -2.26524352
719 -1.08964062
720 1.27206821
721 -1.46643546
722 2.9887191
723 -0.23097956
724 1.82832961
725 0.58295208
726 1.3156364
727 3.09479041
728 2.25835092
729 -1.12322644
730 -1.84393036
731 0.53114616
732 3.16444964
733 2.39891914
734 2.29007955
735 -0.04632115
736 -2.91951806
737 1.2305081
738 0.5082233
739 -0.75066101
740 -3.14617514
741 -0.77006832
742 0.07651274
743 2.23350809
744 3.57887241
745 2.52178374
746 0.07545473
747 -0.19100075
748 0.63453138
749 -0.87574441
750 -1.63569977
751 0.04619277
752 -7.63351298
753 -1.33851767
754 -3.59716292
755 4.75912425
756 -0.76922634
757 -2.99404614
758 3.52655334
759 -5.44804154
760 -5.5608147
761 0.78301242
762 0.29212382
763 -3.54486749
764 -10.64635847
765 -2.49514552
766 -4.1224246
767 0
768 1.17683949
769 0.48653487
770 2.40581489
771 -1.18941858
772 -1.57009939
773 -1.73945998
774 -5.68954481
775 -0.62295409
776 3.49719818
777 3.16384919
778 -3.54576905
779 3.63606997
780 -1.49923109
781 -2.22030296
782 -1.96581008
783 1.34228018
784 -1.84138179
785 -1.71137725
786 0.23439395
787 -0.49273508
788 3.1976449
789 5.65224396
790 -3.76413837
791 -2.63158526
792 2.37410589
793 -0.1043091
794 -2.85966813
795 1.09153226
796 -2.09406309
797 6.39986789
798 -1.03651587
799 1.51551455
800 -1.0395521
801 0.09478082
802 4.80548635
803 -2.37162483
804 2.7762797
805 4.69722872
806 1.039198
807 2.8940276
808 -3.31586249
809 1.9679115
810 0.34263198
811 4.20209408
812 -1.36427936
813 2.79341044
814 0.96959766
815 1.83563946
816 -1.95650968
817 -0.91599385
818 2.0536213
819 -4.79456231
820 0.43496125
821 0.40600892
822 3.58538942
823 0.51398306
824 4.23975862
825 4.42126363
826 -4.7335923
827 1.67915611
828 3.71415795
829 0.46762461
830 1.47882166
831 1.63144272
832 -0.62839074
833 3.47800569
834 0.52222668
835 2.09461366
836 1.04477023
837 -2.18043502
838 1.35275919
839 -0.86998648
840 2.56745045
841 0.95663322
842 -1.94777379
843 1.86298665
844 -1.11211088
845 -2.14262771
846 1.25272279
847 -4.14738578
848 -0.63977325
849 2.32702634
850 2.90335109
851 0.31110874
852 -0.17111064
853 1.10342974
854 -1.40397038
855 -0.39226384
856 -0.09709824
857 2.289535
858 -2.00073541
859 -2.17086777
860 2.2190402
861 -2.60181207
862 -4.17565347
863 -0.51367482
864 2.59325568
865 2.59556545
866 -1.47715042
867 1.20279492
868 -3.11221445
869 -1.20386143
870 0.05197621
871 2.38961097
872 -5.14684872
873 1.08759535
874 -1.55799465
875 4.32393989
876 -3.72109101
877 1.15352061
878 2.96262
879 2.50627592
880 -2.95182461
881 1.36585426
882 -2.29292819
883 0.75327559
884 -2.43846559
885 1.86866662
886 -0.57867541
887 -3.87637054
888 0.69028095
889 -2.56780563
890 2.61696277
891 -4.34861362
892 1.94303219
893 -4.76190664
894 1.73575652
895 1.78252924
896 -0.58168313
897 -1.44071212
898 -2.18945368
899 4.83587365
900 2.20991091
901 -0.3776119
902 3.6559209
903 2.36508353
904 1.82646123
905 1.45419849
906 0.01115449
907 -1.99643146
908 -0.19916239
909 -1.06048808
910 1.33118188
911 -0.88148825
912 1.43439328
913 -2.39832795
914 0.63170039
915 0.59318707
916 -0.53243715
917 -2.4231628
918 -1.97015047
919 0.46332752
920 -3.43794795
921 -0.64508744
922 -1.41091214
923 -3.95136535
924 -0.04615045
925 0.68596861
926 -2.43039115
927 -5.7607149
928 -0.34197636
929 -8.61452335
930 4.89712579
931 5.08613327
932 -6.9973738
933 -3.21250604
934 1.47428959
935 2.50175365
936 -2.79694076
937 -17.91952539
938 7.98023853
939 -3.9943666
940 -8.26612896
941 -3.02697499
942 1.10230144
943 -9.15019842
944 0.70659602
945 -1.16939193
946 9.08270996
947 13.90495985
948 -5.6049347
949 -5.88970029
950 4.02246354
951 -4.406715
952 1.06775873
953 -7.06013735
954 5.8804275
955 1.40394135
956 -1.88333098
957 -4.45113599
958 8.49169201
959 4.64418591
960 6.20755123
961 -3.10698763
962 -0.58555721
963 3.76776279
964 -6.92854871
965 -4.06583055
966 -0.8678093
967 -2.40228117
968 -1.15769506
969 -4.90661602
970 7.01286951
971 -6.42886264
972 -2.3271298
973 2.00816993
974 -4.02625172
975 -6.72151937
976 2.59659709
977 12.5575162
978 -2.31306729
979 4.62554615
980 -2.45263163
981 -4.03582506
982 3.98066011
983 3.70931005
984 -4.68195625
985 2.83338684
986 6.08510306
987 0.34095568
988 -1.84889266
989 -3.26850633
990 3.44210744
991 -3.58196906
992 0.71768022
993 -6.570264
994 0.30282526
995 0.63737003
996 -4.73333016
997 0.7464439
998 -1.55128963
999 0.90546099
1000 0.9322911
1001 -0.3694735
1002 -1.08935104
1003 6.32689294
1004 4.22038673
1005 -1.64939942
1006 -2.16082563
1007 1.8569377
1008 -2.28694932
1009 -2.11966998
1010 -1.07151139
1011 -2.71348883
1012 -2.28524325
1013 -1.25929121
1014 -5.01639852
1015 5.92071476
1016 6.676325
1017 0
1018 1.44862155
1019 1.21597275
1020 3.82453433
1021 -1.27388641
1022 -3.08602046
1023 1.53111949
1024 1.60638294
1025 0.61303399
1026 3.11063609
1027 3.37964034
1028 2.79783797
1029 -4.56540812
1030 -1.03240618
1031 2.53047098
1032 -0.11081092
1033 -4.66922045
1034 -0.16926267
1035 -3.95252635
1036 0.61782879
1037 -4.66008453
1038 3.7952869
1039 1.00830357
1040 -2.16103342
1041 0.13454648
1042 -1.53398722
1043 0.48897318
1044 3.1684927
1045 -2.55566532
1046 -3.98468496
1047 -2.56741257
1048 6.64179882
1049 4.56955776
1050 3.95986629
1051 -0.43591385
1052 -0.53163557
1053 4.4435097
1054 1.86634762
1055 0.09850571
1056 -0.02952273
1057 5.97499644
1058 -1.07746609
1059 -0.00938967127332788
1060 3.1740041
1061 -2.74869942
1062 -2.20870375
1063 0.60292946
1064 3.39611387
1065 3.698597
1066 2.91011881
1067 2.12087511
1068 -2.91262049
1069 1.14782868
1070 2.79401209
1071 0.54361546
1072 -1.58875643
1073 -0.56631055
1074 3.23870038
1075 1.62205924
1076 -2.36590515
1077 1.04564487
1078 -0.20531538
1079 3.2013792
1080 -1.19616826
1081 0.6698894
1082 -0.66543174
1083 1.00079818
1084 0.55138408
1085 1.12056429
1086 3.79597841
1087 0.4845824
1088 -0.158233
1089 -2.59622562
1090 0.10073067
1091 0.2941373
1092 -3.97468389
1093 -3.96239016
1094 2.89563901
1095 -0.43106874
1096 3.45532108
1097 0.63166521
1098 -1.23970885
1099 -1.34265436
1100 -1.35287327
1101 6.75918612
1102 1.73573784
1103 1.51822096
1104 0.54786706
1105 2.60658276
1106 0.10046932
1107 1.04666498
1108 1.97943249
1109 0.64699803
1110 -0.56681067
1111 -0.78554052
1112 -1.73065379
1113 -0.21390873
1114 -2.12933335
1115 -0.64247136
1116 0.19104489
1117 -0.56471655
1118 0.2212664
1119 2.64939802
1120 -1.51275737
1121 -2.44595678
1122 1.64912922
1123 2.67215032
1124 1.84470546
1125 -0.32996138
1126 0.32400999
1127 0.2808369
1128 -1.96737243
1129 -1.00700257
1130 -2.31584526
1131 1.34416615
1132 -0.62673516
1133 1.58404888
1134 2.75772007
1135 -0.04917873
1136 3.24031632
1137 0.43573122
1138 2.86740986
1139 0.76441515
1140 -0.91557516
1141 3.45191746
1142 0.68903791
1143 1.37499055
1144 0.06875555
1145 -0.06246221
1146 0.01875062
1147 1.72467407
1148 0.36857301
1149 1.86058081
1150 -0.52875502
1151 -0.26577832
1152 -0.7267888
1153 0.97614301
1154 -0.47730712
1155 -1.14740344
1156 1.52306268
1157 1.88130839
1158 -0.97375433
1159 -4.3110715
1160 2.76333615
1161 0.36585365
1162 1.05103279
1163 1.73750915
1164 -0.09455324
1165 0.20111617
1166 -1.17473727
1167 1.21856878
1168 0.35407966
1169 -1.08202827
1170 -1.72998033
1171 -0.07259709
1172 0.82939338
1173 2.25758339
1174 1.53837532
1175 -1.03510145
1176 0.82972593
1177 -0.23180112
1178 0.90613091
1179 0.82892415
1180 3.8307792
1181 1.47358445
1182 0.2546719
1183 -0.54047994
1184 0.24997119
1185 0.55290383
1186 -0.9056593
1187 -0.78881894
1188 2.07271322
1189 -0.41364759
1190 -0.01618298
1191 -2.42244191
1192 2.23377141
1193 0.60573229
1194 2.14492905
1195 0.1263123
1196 -0.51510959
1197 0.63400854
1198 0.17850948
1199 -0.41402652
1200 0.66835176
1201 -0.3816211
1202 -1.31716575
1203 0.96251048
1204 4.68765918
1205 3.09921922
1206 0.13663527
1207 -0.61403607
1208 -0.71589391
1209 -2.52370406
1210 -2.51811667
1211 2.05301302
1212 -3.99795936
1213 0.42970397
1214 -0.29581111
1215 1.09139447
1216 1.68753943
1217 0.15977272
1218 3.66368371
1219 0.75448971
1220 0.13302099
1221 -0.61992716
1222 1.21788304
1223 1.06627387
1224 0.17906161
1225 -0.50241401
1226 -2.6461473
1227 -0.29424967
1228 2.98119404
1229 -0.69943317
1230 -0.12228771
1231 -1.76306527
1232 -0.33900095
1233 -1.47066233
1234 -0.37568666
1235 0.12739795
1236 -1.60830264
1237 -2.26050377
1238 0.48690553
1239 4.17653853
1240 -0.69262133
1241 -0.8959935
1242 1.18662354
1243 -1.42653978
1244 0.44291446
1245 -1.625394
1246 1.86073231
1247 1.43273704
1248 1.07451345
1249 0.86843679
1250 3.433943
1251 1.22942592
1252 -1.18614009
1253 1.21472546
1254 -0.4299726
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 20 23:41:51 2017
@author: red-sky
"""
import numpy as np
import theano
from theano import tensor as T
class EmbeddingLayer(object):
def __init__(self, num_vocab, word_dim, rng, embedding_w=None):
'''
word_dim :: dimension of the word embeddings
num_vocab :: number of word embeddings in the vocabulary
embedding_w :: pre-train word vector
'''
if embedding_w is None:
word_vectors = rng.uniform(-1.0, 1.0, (num_vocab, word_dim))
self.embedding_w = theano.shared(word_vectors,
name="EmbeddingLayer_W") \
.astype(theano.config.floatX)
else:
self.embedding_w = theano.shared(embedding_w,
name="EmbeddingLayer_W") \
.astype(theano.config.floatX)
self.params = [self.embedding_w]
self.infor = [num_vocab, word_dim]
def words_ind_2vec(self, index):
map_word_vectors = self.embedding_w[index]
output = T.mean(map_word_vectors, axis=0)
return output, map_word_vectors
if __name__ == "__main__":
rng = np.random.RandomState(220495)
arrWords = T.ivector("words")
EMBD = EmbeddingLayer(100, 150, rng=rng)
Word2Vec = theano.function(
inputs=[arrWords],
outputs=EMBD.words_ind_2vec(arrWords)
)
Vec = Word2Vec([1, 2, 3, 4])
Vec = Word2Vec([2, 3, 4])
print("Dim: ", Vec.shape)
print("Val: ", Vec)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 25 17:36:33 2017
@author: red-sky
"""
import sys
import json
import theano
import pickle
import os.path
import numpy as np
import theano.tensor as T
from SmallUtils import createShareVar, ADAM_OPTIMIZER
from EmbeddingLayer import EmbeddingLayer
from RoleDependentLayer import RoleDependentLayer
class Input(object):
def __init__(self, object1, object1_fake, action, object2, rng,
vovab_length=4000, wordDim=100, trainedWordsVectors=None,):
# Init Embeding layer, input vector of index and ouput average
# of word vector as ref Ding et al 2014
self.EMBD = EmbeddingLayer(vovab_length, wordDim, rng=rng,
embedding_w=trainedWordsVectors)
object1_vector, _ = self.EMBD.words_ind_2vec(object1)
action_vector, _ = self.EMBD.words_ind_2vec(action)
object2_vector, _ = self.EMBD.words_ind_2vec(object2)
object1_vector_fake, _ = self.EMBD.words_ind_2vec(object1_fake)
self.output = [object1_vector, object1_vector_fake,
action_vector, object2_vector]
self.params = self.EMBD.params
def get_params(self):
trainParams = {
"WordWvec": self.EMBD.embedding_w.get_value()
}
return(trainParams)
class ModelBody(object):
def __init__(self, vectorObjects, rng, n_out, n_in,
trainedModelParams=None):
if trainedModelParams is None:
trainedModelParams = {
"roleDependentLayer1_": {
"T": None, "W1": None, "W2": None, "b": None
},
"roleDependentLayer2_": {
"T": None, "W1": None, "W2": None, "b": None
},
"roleDependentLayer3_": {
"T": None, "W1": None, "W2": None, "b": None
}
}
Obj1, Ob1_fake, Act, Obj2 = vectorObjects
self.RoleDepen1 = RoleDependentLayer(
left_dependent=T.stack([Obj1, Ob1_fake], axis=0),
right_dependent=Act,
n_in=n_in, n_out=n_out, rng=rng,
trainedParams=trainedModelParams,
name="roleDependentLayer1_"
)
self.RoleDepen1_output = self.RoleDepen1.output
self.RoleDepen2 = RoleDependentLayer(
left_dependent=Obj2,
right_dependent=Act,
n_in=n_in, n_out=n_out, rng=rng,
trainedParams=trainedModelParams,
name="roleDependentLayer2_"
)
self.RoleDepen2_output = T.flatten(self.RoleDepen2.output, outdim=1)
self.RoleDepen3 = RoleDependentLayer(
left_dependent=self.RoleDepen1_output,
right_dependent=self.RoleDepen2_output,
n_in=n_out, n_out=n_out, rng=rng,
trainedParams=trainedModelParams,
name="roleDependentLayer3_"
)
self.params = self.RoleDepen1.params + self.RoleDepen2.params + \
self.RoleDepen3.params
self.L2 = (
self.RoleDepen1.L2 +
self.RoleDepen2.L2 +
self.RoleDepen3.L2
)
self.output = self.RoleDepen3.output
def get_params(self):
trainedModelParams = {
"roleDependentLayer1_": self.RoleDepen1.get_params(),
"roleDependentLayer2_": self.RoleDepen2.get_params(),
"roleDependentLayer3_": self.RoleDepen3.get_params()
}
return(trainedModelParams)
class LogisticRegression(object):
def __init__(self, rng, layerInput, n_in, n_out,
paramsLayer=None,
name="LogisticRegression_"):
self.layerInput = layerInput
if paramsLayer is None:
self.W = createShareVar(rng=rng, name=name+"W",
factor_for_init=n_out + n_in,
dim=(n_in, n_out))
else:
self.W = theano.shared(value=paramsLayer["W"],
name=name+"W", borrow=True)
if paramsLayer is None:
b_values = np.zeros((n_out,), dtype=theano.config.floatX)
self.b = theano.shared(value=b_values,
name=name+"b", borrow=True)
else:
self.b = theano.shared(value=paramsLayer["b"],
name=name+"b", borrow=True)
step1 = T.dot(self.layerInput, self.W)
self.prob_givenX = T.tanh(step1 + self.b)
self.y_predict = T.argmax(self.prob_givenX, axis=1)
self.params = [self.W, self.b]
self.L2 = sum([(param**2).sum() for param in self.params])
def get_params(self):
trainedParams = {
"W": self.W.get_value(), "b": self.b.get_value()
}
return(trainedParams)
def neg_log_likelihood(self, y_true):
y_true = T.cast(y_true, "int32")
log_prob = T.log(self.prob_givenX)
nll = -T.mean(log_prob[T.arange(y_true.shape[0]), y_true])
return nll
def margin_loss(self):
loss = T.max([0, 1 - self.prob_givenX[0, 0] + self.prob_givenX[1, 0]])
return loss
def cal_errors(self, y_true):
if y_true.ndim != self.y_predict.ndim:
raise TypeError(
"y should have the same shape as self.y_pred",
("y_true", y_true.ndim, "y_pred", self.y_predict.ndim)
)
if y_true.dtype.startswith("int"):
return T.mean(T.neq(self.y_predict, y_true))
else:
raise TypeError(
"y_true should have type int ...",
("y_true", y_true.type, "y_pred", self.y_predict.type)
)
def main(dataPath, trainedParamsPath="modelTrained.pickle",
outputVectorPath="resultEmbeding.pickle",
learning_rate=0.005, L2_reg=0.0001,
n_epochs=500, num_K=150, word_dim=150):
# CONSTANT VARIABLES
RNG = np.random.RandomState(220495 + 280295 + 1)
LABEL_NUM = 2
if os.path.isfile(trainedParamsPath):
with open(trainedParamsPath, 'rb') as handle:
trainedParams = pickle.load(handle)
else:
print("No Trained Model, create new")
trainedParams = {
"Input": {"WordWvec": None}, "Body": None, "Output": None
}
OPTIMIZER = ADAM_OPTIMIZER
# INPUT DATA
data_indexed_events = np.load(dataPath,allow_pickle=True)
N_sample = len(data_indexed_events)
# N_sample = 1
all_index = list(set(sum(np.concatenate(data_indexed_events).ravel(), [])))
# all_train_index = list(set(np.hstack(data_indexed_events[0:NNN].flat)))
# Snip tensor at begin
object1 = T.ivector("object1")
object1_fake = T.ivector("object1_fake")
action = T.ivector("action")
object2 = T.ivector("object2")
constainY = theano.shared(
np.asarray([1, 0], dtype=theano.config.floatX),
borrow=True
)
# WORDS EMBEDING VECTOR
wordsEmbedLayer = Input(
object1=object1, object1_fake=object1_fake,
action=action, object2=object2, rng=RNG,
wordDim=word_dim, vovab_length=len(all_index),
trainedWordsVectors=trainedParams["Input"]["WordWvec"]
)
Obj1, Ob1_fake, Act, Obj2 = wordsEmbedLayer.output
# EVENTS EMBEDING LAYER - THREE ROLE DEPENTDENT LAYER
eventsEmbedingLayer = ModelBody(
vectorObjects=wordsEmbedLayer.output,
n_out=num_K, n_in=word_dim, rng=RNG,
trainedModelParams=trainedParams["Body"]
)
# CLASSIFY LAYER
predict_layers = LogisticRegression(
layerInput=eventsEmbedingLayer.output,
rng=RNG, n_in=num_K, n_out=1,
paramsLayer=trainedParams["Output"]
)
# COST FUNCTION
COST = (
predict_layers.margin_loss() +
L2_reg * predict_layers.L2 +
L2_reg * eventsEmbedingLayer.L2
)
# GRADIENT CALCULATION and UPDATE
all_params = wordsEmbedLayer.params + \
eventsEmbedingLayer.params + predict_layers.params
print("TRAIN: ", all_params)
UPDATE = OPTIMIZER(COST, all_params, learning_rate=learning_rate)
# TRAIN MODEL
GET_COST = theano.function(
inputs=[object1, object1_fake, action, object2],
outputs=[predict_layers.margin_loss(),
predict_layers.prob_givenX],
)
# TEST = theano.function(
# inputs=[object1, object1_fake, action, object2],
# outputs=eventsEmbedingLayer.RoleDepen2.test,
# on_unused_input='warn'
# )
TRAIN = theano.function(
inputs=[object1, object1_fake, action, object2],
outputs=[predict_layers.margin_loss()],
updates=UPDATE
)
GET_EVENT_VECTOR = theano.function(
inputs=[object1, object1_fake, action, object2],
outputs=[predict_layers.margin_loss(),
eventsEmbedingLayer.output],
)
def generate_fake_object(all_index, RNG, obj):
fake_obj = list(RNG.choice(all_index, len(obj)))
while sorted(fake_obj) == sorted(obj):
print("WRONG faking object 1", obj)
fake_obj = list(RNG.choice(all_index, len(obj)))
return(fake_obj)
def generate_list_object(data_indexed_events, all_index, RNG):
list_fake_object1 = [
generate_fake_object(all_index, RNG, events[0])
for events in data_indexed_events
]
list_real_object = set([
"_".join([str(a) for a in sorted(events[0])])
for events in data_indexed_events
])
wrong = 0
while True:
valid = True
wrong += 1
for i, obj in enumerate(list_fake_object1):
s = "_".join([str(a) for a in sorted(obj)])
if s in list_real_object:
valid = valid and False
list_fake_object1[i] = \
generate_fake_object(all_index, RNG, s)
else:
valid = valid and True
if valid:
break
print("There are %d wrong random loops" % wrong)
return(list_fake_object1)
print("*"*72)
print("Begin Training process")
for epoch in range(n_epochs):
# create false label
print("Begin new epoch: %d" % epoch)
list_fake_object1 = generate_list_object(data_indexed_events,
all_index, RNG)
cost_of_epoch = []
set_index = set(range(N_sample))
temp_variable = N_sample
print("*" * 72+"\n")
print("*" * 72+"\n")
# train
model_train = {
"Input": wordsEmbedLayer.get_params(),
"Body": eventsEmbedingLayer.get_params(),
"Output": predict_layers.get_params()
}
RESULT = {}
outCOST = []
Max_inter = len(set_index)*2
iter_num = 0
while len(set_index) > 0 and iter_num <= Max_inter:
iter_num += 1
index = set_index.pop()
ob1_real, act, obj2 = data_indexed_events[index]
ob1_fake = list_fake_object1[index]
cost, probY = GET_COST(ob1_real, ob1_fake, act, obj2)
outCOST.append(cost)
# test = TEST(ob1_real, ob1_fake, act, obj2)
# for a in test:
# print(a, a.shape)
if cost > 0:
set_index.add(index)
c = TRAIN(ob1_real, ob1_fake, act, obj2)
else:
RESULT[index] = GET_EVENT_VECTOR(ob1_real, ob1_fake, act, obj2)
if (len(set_index) % 50 == 0 and
temp_variable != len(set_index)):
temp_variable = len(set_index)
print("There are %f %% left in this %d "
"epoch with average cost %f"
% (len(set_index)/float(N_sample)*100,
epoch, np.mean(outCOST[-50:])))
if iter_num > Max_inter - 5:
print(set_index, ob1_real, ob1_fake, act, obj2)
with open(trainedParamsPath, 'wb') as handle:
pickle.dump(model_train, handle,
protocol=pickle.HIGHEST_PROTOCOL)
with open(outputVectorPath, 'wb') as handle:
pickle.dump(RESULT, handle,
protocol=pickle.HIGHEST_PROTOCOL)
if __name__ == "__main__":
# arg = ["", "Data/Query_Apple/2005-2010/IndexedEvents.npy",
# "Data/Query_Apple/2005-2010/linhtinh/", "20"]
arg = sys.argv
main(dataPath="../../Thesis_data/IndexedEvents.npy", trainedParamsPath="TrainedParams.pickle",
outputVectorPath="resultEmbeding.pickle", n_epochs=20)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 25 16:13:18 2017
@author: red-sky
"""
import theano
import numpy as np
import theano.tensor as T
from SmallUtils import createShareVar
class RoleDependentLayer(object):
def __init__(self, left_dependent, right_dependent, rng,
n_in=100, n_out=4, trainedParams=None,
name="RoleDependentEmbedding_"):
if trainedParams is None:
trainedParams = {
name: {
"T": None, "W1": None, "W2": None, "b": None
}
}
if trainedParams[name]["T"] is not None:
assert trainedParams[name]["T"].shape == (n_out, n_in, n_in)
self.T = theano.shared(value=trainedParams[name]["T"],
name=name+"T", borrow=True)
else:
self.T = createShareVar(rng=rng, name=name+"T",
factor_for_init=n_out + n_in,
dim=(n_out, n_in, n_in))
if trainedParams[name]["W1"] is not None:
assert trainedParams[name]["W1"].shape == (n_in, n_out)
self.W1 = theano.shared(value=trainedParams[name]["W1"],
name=name+"W1", borrow=True)
else:
self.W1 = createShareVar(rng=rng, name=name+"W1",
factor_for_init=n_out + n_in,
dim=(n_in, n_out))
if trainedParams[name]["W2"] is not None:
assert trainedParams[name]["W2"].shape == (n_in, n_out)
self.W2 = theano.shared(value=trainedParams[name]["W2"],
name=name+"W2", borrow=True)
else:
self.W2 = createShareVar(rng=rng, name=name+"W2",
factor_for_init=n_out + n_in,
dim=(n_in, n_out))
if trainedParams[name]["b"] is not None:
assert trainedParams[name]["b"].shape == (n_out,)
self.b = theano.shared(value=trainedParams[name]["b"],
name=name+"b", borrow=True)
else:
b_values = np.zeros(shape=(n_out,), dtype=theano.config.floatX)
self.b = theano.shared(value=b_values, name=name+"b", borrow=True)
# list of layer params
self.params = [self.T, self.W1, self.W2, self.b]
# L2 regulation
self.L2 = sum([(param**2).sum() for param in self.params])
# Bi-linear step
def one_kernel(Tk, left, right):
first_bi_libear = theano.dot(left, Tk)
seccon_bi_linear = theano.dot(first_bi_libear, right)
return(seccon_bi_linear.flatten())
bi_1, _ = theano.scan(
fn=one_kernel,
sequences=[self.T],
non_sequences=[left_dependent, right_dependent],
n_steps=n_out
)
# Feed forward network step
feedforward_step1 = theano.dot(left_dependent, self.W1)
feedforward_step2 = theano.dot(right_dependent, self.W2)
feedforward_step3 = (feedforward_step1 +
feedforward_step2.dimshuffle("x", 0) +
self.b.dimshuffle("x", 0))
feedforward_step4 = bi_1.dimshuffle(1, 0) + feedforward_step3
self.output = theano.tensor.tanh(feedforward_step4)
self.test = [feedforward_step3]
def output_(self, left_dependent, right_dependent):
def one_kernel(Tk, left, right):
first_bi_libear = theano.dot(left, Tk)
seccon_bi_linear = theano.dot(first_bi_libear, right)
return(seccon_bi_linear.flatten())
bi_linear_tensor, _ = theano.scan(
fn=one_kernel,
sequences=[self.T],
non_sequences=[left_dependent, right_dependent],
n_steps=n_out
)
bi_linear_tensor = bi_linear_tensor.dimshuffle(1, 0)
feedforward_step1 = theano.dot(left_dependent, self.W1)
feedforward_step2 = theano.dot(right_dependent, self.W2)
feedforward_step3 = (feedforward_step1 +
feedforward_step2.dimshuffle("x", 0) +
self.b.dimshuffle("x", 0))
feedforward_step4 = bi_linear_tensor + feedforward_step3
output = theano.tensor.tanh(feedforward_step4)
return(output)
def get_params(self):
trainedParams = {
"T": self.T.get_value(), "W1": self.W1.get_value(),
"W2": self.W2.get_value(), "b": self.b.get_value()
}
return(trainedParams)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 25 15:55:14 2017
@author: red-sky
"""
import theano
import theano.tensor as T
import numpy as np
def createShareVar(rng, dim, name, factor_for_init):
var_values = np.asarray(
rng.uniform(
low=-np.sqrt(6.0 / factor_for_init),
high=np.sqrt(6.0 / factor_for_init),
size=dim,
)
)
Var = theano.shared(value=var_values, name=name, borrow=True)
return Var
def adadelta(lr, tparams, cost, grads, listInput):
"""
An adaptive learning rate optimizer
Parameters
----------
lr : Theano SharedVariable
Initial learning rate
tpramas: Theano SharedVariable
Model parameters
grads: Theano variable
Gradients of cost w.r.t to parameres
cost: Theano variable
Objective fucntion to minimize
Notes
-----
For more information, see [ADADELTA]_.
.. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
Rate Method*, arXiv:1212.5701.
"""
np_float = np.asarray(0., dtype=theano.config.floatX)
zipped_grads = [theano.shared(p.get_value() * np_float,
name='%s_grad' % k)
for k, p in enumerate(tparams)]
running_up2 = [theano.shared(p.get_value() * np_float,
name='%s_rup2' % k)
for k, p in enumerate(tparams)]
running_grads2 = [theano.shared(p.get_value() * np_float,
name='%s_rgrad2' % k)
for k, p in enumerate(tparams)]
zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
for rg2, g in zip(running_grads2, grads)]
f_grad_shared = theano.function(inputs=listInput,
outputs=cost,
updates=zgup + rg2up,
name='adadelta_f_grad_shared')
updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg
for zg, ru2, rg2 in zip(zipped_grads,
running_up2,
running_grads2)]
ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
for ru2, ud in zip(running_up2, updir)]
param_up = [(p, p + ud) for p, ud in zip(tparams, updir)]
f_update = theano.function([lr], [], updates=ru2up + param_up,
on_unused_input='ignore',
name='adadelta_f_update')
return f_grad_shared, f_update
def ADAM_OPTIMIZER(loss, all_params, learning_rate=0.001,
b1=0.9, b2=0.999, e=1e-8, gamma=1-1e-8):
"""
CITE: http://sebastianruder.com/optimizing-gradient-descent/index.html#adam
ADAM update rules
Default values are taken from [Kingma2014]
References:
[Kingma2014] Kingma, Diederik, and Jimmy Ba.
"Adam: A Method for Stochastic Optimization."
arXiv preprint arXiv:1412.6980 (2014).
http://arxiv.org/pdf/1412.6980v4.pdf
"""
updates = []
all_grads = theano.grad(loss, all_params)
alpha = learning_rate
t = theano.shared(np.float32(1))
# (Decay the first moment running average coefficient)
b1_t = b1*gamma**(t-1)
for params_previous, g in zip(all_params, all_grads):
init_moment = np.zeros(params_previous.get_value().shape,
dtype=theano.config.floatX)
# (the mean)
first_moment = theano.shared(init_moment)
# (the uncentered variance)
second_moment = theano.shared(init_moment)
# (Update biased first moment estimate)
bias_m = b1_t*first_moment + (1 - b1_t)*g
# (Update biased second raw moment estimate)
bias_v = b2*second_moment + (1 - b2)*g**2
# (Compute bias-corrected first moment estimate)
unbias_m = bias_m / (1-b1**t)
# (Compute bias-corrected second raw moment estimate)
unbias_v = bias_v / (1-b2**t)
# (Update parameters)
update_term = (alpha * unbias_m) / (T.sqrt(unbias_v) + e)
params_new = params_previous - update_term
updates.append((first_moment, bias_m))
updates.append((second_moment, bias_v))
updates.append((params_previous, params_new))
updates.append((t, t + 1.))
return updates
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 13 17:01:36 2017
@author: red-sky
"""
import sys
import numpy as np
np.random.seed(280295)
import keras.backend as K
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import regularizers, optimizers
def recall(y_true, y_pred):
"""Recall metric.
Only computes a batch-wise average of recall.
Computes the recall, a metric for multi-label classification of
how many relevant items are selected.
"""
true_positives = K.sum(K.round(K.clip(y_true[:, 1] * y_pred[:, 1], 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true[:, 1], 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision(y_true, y_pred):
"""Precision metric.
Only computes a batch-wise average of precision.
Computes the precision, a metric for multi-label classification of
how many selected items are relevant.
"""
true_positives = K.sum(K.round(K.clip(y_true[:, 1] * y_pred[:, 1], 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred[:, 1], 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def fbeta_score(y_true, y_pred):
# If there are no true positives, fix the F score at 0 like sklearn.
if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
return 0
p = precision(y_true, y_pred)
r = recall(y_true, y_pred)
bb = 1 ** 2
fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
return fbeta_score
def main(dataX_path, dataY_path, result_path,
n_epoch, input_dim, days):
# load data
np.random.seed(2204)
X = np.load(dataX_path)
Y = np.load(dataY_path)
# build Model
model = Sequential()
model.add(Conv1D(128, 1, activation='relu', input_shape=(days, input_dim)))
model.add(Conv1D(128, 3, activation='relu', padding='same'))
model.add(MaxPooling1D(2))
model.add(Flatten())
model.add(Dropout(0.8))
model.add(Dense(2, activation='softmax'))
adam = optimizers.Adam(lr=0.001)
model.compile(loss='categorical_crossentropy',
optimizer=adam,
metrics=['accuracy', recall, precision, fbeta_score])
# model Compile
model_name = result_path+'model2_price_move_predict.hdf5'
checkpointer = ModelCheckpoint(filepath=model_name,
monitor='val_fbeta_score',
verbose=2, save_best_only=True)
earlystopper = EarlyStopping(monitor='val_loss', patience=20, verbose=2)
outmodel = open(result_path+'model2_price_move_predict.json', 'w')
outmodel.write(model.to_json())
outmodel.close()
# process Training
model.fit(X, Y, batch_size=32, verbose=2,
validation_split=0.1, epochs=n_epoch,
callbacks=[checkpointer])
if __name__ == "__main__":
dataX = sys.argv[1]
dataY = sys.argv[2]
model_path = sys.argv[3]
n_epoch = int(sys.argv[4])
input_dim = int(sys.argv[5])
days = int(sys.argv[6])
main(dataX, dataY, model_path, n_epoch, input_dim, days)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 13 17:01:36 2017
@author: red-sky
"""
import sys
import numpy as np
np.random.seed(280295)
import keras.backend as K
from keras.models import Sequential
from keras import regularizers, optimizers
from keras.layers import Dense, Activation, LSTM, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping
def recall(y_true, y_pred):
"""Recall metric.
Only computes a batch-wise average of recall.
Computes the recall, a metric for multi-label classification of
how many relevant items are selected.
"""
true_positives = K.sum(K.round(K.clip(y_true[:, 0] * y_pred[:, 0], 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true[:, 0], 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision(y_true, y_pred):
"""Precision metric.
Only computes a batch-wise average of precision.
Computes the precision, a metric for multi-label classification of
how many selected items are relevant.
"""
true_positives = K.sum(K.round(K.clip(y_true[:, 0] * y_pred[:, 0], 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred[:, 0], 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def fbeta_score(y_true, y_pred):
# If there are no true positives, fix the F score at 0 like sklearn.
if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
return 0
p = precision(y_true, y_pred)
r = recall(y_true, y_pred)
bb = 1 ** 2
fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
return fbeta_score
def main(dataX_path, dataY_path, result_path,
n_epoch, input_dim, days):
# load data
np.random.seed(2204)
X = np.load(dataX_path)
Y = np.load(dataY_path)
# build Model
model = Sequential()
model.add(LSTM(256, input_shape=(days, input_dim),
kernel_regularizer=regularizers.l2(0.001)))
model.add(Dropout(0.6))
model.add(Dense(2, activation='softmax',
kernel_regularizer=regularizers.l2(0.001)))
adam = optimizers.Adam(lr=0.001)
model.compile(loss='categorical_crossentropy',
optimizer=adam,
metrics=['accuracy', recall, precision, fbeta_score])
# model Compile
model_name = result_path+'model2_price_move_predict.hdf5'
checkpointer = ModelCheckpoint(filepath=model_name,
monitor='val_fbeta_score', mode="max",
verbose=2, save_best_only=True)
earlystopper = EarlyStopping(monitor='val_loss', patience=20, verbose=2)
outmodel = open(result_path+'model2_price_move_predict.json', 'w')
outmodel.write(model.to_json())
outmodel.close()
# process Training
model.fit(X, Y, batch_size=32, verbose=2,
validation_split=0.1, epochs=n_epoch,
callbacks=[checkpointer])
if __name__ == "__main__":
dataX = sys.argv[1]
dataY = sys.argv[2]
model_path = sys.argv[3]
n_epoch = int(sys.argv[4])
input_dim = int(sys.argv[5])
days = int(sys.argv[6])
main(dataX, dataY, model_path, n_epoch, input_dim, days)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 13 17:01:36 2017
@author: red-sky
"""
import sys
import numpy as np
np.random.seed(280295)
import keras.backend as K
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import regularizers, optimizers
def recall(y_true, y_pred):
"""Recall metric.
Only computes a batch-wise average of recall.
Computes the recall, a metric for multi-label classification of
how many relevant items are selected.
"""
true_positives = K.sum(K.round(K.clip(y_true[:, 0] * y_pred[:, 0], 0, 1)))
possible_positives = K.sum(K.round(K.clip(y_true[:, 0], 0, 1)))
recall = true_positives / (possible_positives + K.epsilon())
return recall
def precision(y_true, y_pred):
"""Precision metric.
Only computes a batch-wise average of precision.
Computes the precision, a metric for multi-label classification of
how many selected items are relevant.
"""
true_positives = K.sum(K.round(K.clip(y_true[:, 0] * y_pred[:, 0], 0, 1)))
predicted_positives = K.sum(K.round(K.clip(y_pred[:, 0], 0, 1)))
precision = true_positives / (predicted_positives + K.epsilon())
return precision
def fbeta_score(y_true, y_pred):
# If there are no true positives, fix the F score at 0 like sklearn.
if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
return 0
p = precision(y_true, y_pred)
r = recall(y_true, y_pred)
bb = 1 ** 2
fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
return fbeta_score
def main(dataX_path, dataY_path, result_path,
n_epoch, input_dim, days):
# load data
np.random.seed(2204)
X = np.load(dataX_path)
Y = np.load(dataY_path)
# build Model
model = Sequential()
model.add(Flatten(input_shape=(days, input_dim)))
model.add(Dense(512, activation='sigmoid'))
model.add(Dropout(0.8))
model.add(Dense(1024, activation='sigmoid'))
model.add(Dropout(0.8))
# model.add(Dense(1024, activation='sigmoid'))
model.add(Dropout(0.8))
model.add(Dense(2, activation='softmax'))
adam = optimizers.Adam(lr=0.001)
model.compile(loss='categorical_crossentropy',
optimizer=adam,
metrics=['accuracy', recall, precision, fbeta_score])
# model Compile
model_name = result_path+'model2_price_move_predict.hdf5'
checkpointer = ModelCheckpoint(filepath=model_name, monitor='val_acc',
verbose=2, save_best_only=True)
earlystopper = EarlyStopping(monitor='val_loss', patience=20, verbose=2)
outmodel = open(result_path+'model2_price_move_predict.json', 'w')
outmodel.write(model.to_json())
outmodel.close()
# process Training
model.fit(X, Y, batch_size=32, verbose=2,
validation_split=0.1, epochs=n_epoch,
callbacks=[checkpointer])
if __name__ == "__main__":
dataX = sys.argv[1]
dataY = sys.argv[2]
model_path = sys.argv[3]
n_epoch = int(sys.argv[4])
input_dim = int(sys.argv[5])
days = int(sys.argv[6])
main(dataX, dataY, model_path, n_epoch, input_dim, days)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 16 21:57:57 2017
@author: red-sky
"""
import bs4
import json
import sys
import urllib.request as urlreq
from bs4 import BeautifulSoup
import requests
BLOOMBERG_params = {
"sort_by_newest": "time:desc",
"sort_by_oldest": "time:asc",
"source_from_bloomberg": "sites=bview",
"end_time": "2017-03-12T15:20:16.240Z"
}
DATA_TO_EXTRACT = {
"query_list_news": ["div", {"class": "search-result-story__container"}],
"query_headline": ["h1", {"class": "search-result-story__headline"}],
"query_time_published": ["time", {"class": "published-at"}],
"query_body": ["div", {"class": "search-result-story__body"}]
}
def parser_url(query_string, page,
sort_by="sort_by_oldest",
source="source_from_bloomberg"):
url = "https://www.bloomberg.com/"
# add search query
url = url + "search?query=" + query_string + "&"
# add sort by
url = url + "sort=" + BLOOMBERG_params[sort_by] + "&"
# add time to query -- use present time
url = url + "sites=" + BLOOMBERG_params[source] + "&"
# add page number
url = url + "page=" + str(page)
return url
def get_rid_off_key(list_contents):
body_string = ""
for substring in list_contents:
if (type(substring) == bs4.element.Tag):
# join all body string and
# eliminate highlight query string key
body_string += substring.string
else:
if (type(substring.string) == bs4.element.NavigableString):
body_string += substring.string
return(body_string)
def extract_from_url(url):
try:
with requests.get(url) as response:
html_of_page = response.read()
soup_object = BeautifulSoup(html_of_page, "lxml")
# Extract list of news in soup object
param_to_find = DATA_TO_EXTRACT["query_list_news"]
list_of_news = soup_object.find_all(param_to_find[0],
attrs=param_to_find[1])
if (len(list_of_news) == 0):
return None
# create list result extracted
result = []
for block_new in list_of_news:
# extract time from block
param_to_find = DATA_TO_EXTRACT["query_time_published"]
time = block_new.find_all(param_to_find[0],
attrs=param_to_find[1])
time = time[0]["datetime"]
# extract new headline
param_to_find = DATA_TO_EXTRACT["query_headline"]
headline = block_new.find_all(param_to_find[0],
attrs=param_to_find[1])
headline = get_rid_off_key(headline[0].a.contents)
# extract new body list if string
param_to_find = DATA_TO_EXTRACT["query_body"]
body = block_new.find_all(param_to_find[0],
attrs=param_to_find[1])
print(body)
body_string = get_rid_off_key(body[0].contents)
extracted_from_block = {"time": time,
"headline": headline,
"body": body_string}
# for debug :
# print("\t".join(extracted_from_block))
if len(body_string) >= 5:
result.append(extracted_from_block)
except Exception as inst:
print("Something whenwrong :)", inst)
print("ULR: ", url)
result = []
return(result)
def Query(key, max_page=5000):
# Init page and looping until return None
page = 1
result = "not None"
all_result_query = []
error = 0
while True and page < max_page:
print("Colected: %d articles" % len(all_result_query))
new_url = parser_url(key, page)
result = extract_from_url(new_url)
if len(result) > 0 or error > 10:
page += 1
error = 0
else:
error += 1
if result is not None:
all_result_query += result
else:
break
return(all_result_query)
if __name__ == "__main__":
print("Begin query information about: ", sys.argv[1])
print("Then will save result in: ", sys.argv[2])
News = Query(sys.argv[1], int(sys.argv[4]))
file_name1 = sys.argv[2]
with open(file_name1, "w") as W:
json.dump(News, W, indent=1)
file_name2 = sys.argv[3]
with open(file_name2, "w") as W:
W.write("\n".join([new["body"] for new in News]))
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 20 17:52:11 2017
@author: red-sky
"""
import sys
import json
import numpy as np
def updateDict(words, dictUp):
# update word dictionary with given "words" and the dict "dictUp"
for w in words:
if w in dictUp:
dictUp[w] += 1
else:
dictUp[w] = 0
return dictUp
def extractVocab(eventsFile, fromIndex=0, toIndex="END"):
# from Events file, extract infor about words and create a mapping
vocab = dict()
with open(eventsFile, "r") as file:
list_events = file.read().strip().splitlines()
if toIndex == -1:
list_events = list_events[fromIndex:]
else:
list_events = sorted(set(list_events[fromIndex:toIndex]))
for i, event in enumerate(list_events):
if event[0] != "\t":
index = i
break
list_events = list_events[index:]
for event in list_events:
event = event.split("\t")
words = event[1].split(" ") + \
event[2].split(" ") + \
event[3].split(" ")
vocab = updateDict(words, vocab)
vocab_words = vocab.keys()
support_words = ["NOISEWORDS"]
vocab_words = support_words + \
sorted(vocab_words, key=lambda x: vocab[x], reverse=True)
IndexWords = range(len(vocab_words))
Count = ["NOISEWORDS"] + [vocab[w] for w in vocab_words[1:]]
result = [dict(zip(vocab_words, Count)),
dict(zip(IndexWords, vocab_words)),
dict(zip(vocab_words, IndexWords))]
return result, list_events
def convertEvent(eventsFile, vocabMapping, countMin=5):
# convert all Events to index for training
wordCount, _, word2index = vocabMapping
Events = []
with open(eventsFile, "r") as file:
list_events = file.read().strip().splitlines()
for event in list_events:
event = event.split("\t")
list_obj = [event[1].split(" "),
event[2].split(" "),
event[3].split(" ")]
# Covert only words that appear more than countMin
wordsIndexed = []
for obj in list_obj:
objIndex = []
for w in obj:
if wordCount[w] >= countMin:
objIndex.append(word2index[w])
else:
objIndex.append(0)
wordsIndexed.append(objIndex)
Events.append(wordsIndexed)
return Events
if __name__ == "__main__":
# in
EventPath = "../../Thesis_data/Apple_query_result_body.txt"
fromIndex = 0
toIndex = -1
minCountWord = 5
# out
EventNewPath = "./Events_for_training.txt"
VocabPath = "./Vocab_in_events_for_training.json"
IndexdEventPath = "./IndexedEvents_for_training.npy"
vocabMapping, EventNew = extractVocab(EventPath, fromIndex, toIndex)
with open(VocabPath, "w") as W:
json.dump(vocabMapping, W, indent=2)
with open(EventNewPath, "w") as W:
W.write("\n".join(EventNew))
indexed_events = convertEvent(EventNewPath, vocabMapping, minCountWord)
np.save(arr=np.array(indexed_events), file=IndexdEventPath)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 20 11:58:54 2017
@author: red-sky
"""
import sys
import json
def findDate(news_body, list_news):
date = ""
for ind, new in enumerate(list_news):
if news_body in new["body"]:
date = new["time"]
break
return date
def extractAllDate(list_events, list_news, choosedInfor=[1, 2, 3, 0, 6]):
list_result = []
N = len(list_events)
i = 0.0
for event in list_events:
i += 1
if i % 1000 == 0:
print("Done %f percents" % (i/N*100))
date = [findDate(event[6], list_news)]
infor = date + [event[i] for i in choosedInfor]
list_result.append(infor)
return list_result
if __name__ == "__main__":
events = open(sys.argv[1], "r").read().strip().splitlines()
events = [event.split("\t") for event in events
if len(event.split("\t")) > 5]
news = json.load(open(sys.argv[2], "r"))
result = extractAllDate(events, news)
with open(sys.argv[3], "w") as W:
for line in result[1:]:
W.write("\t".join(line)+"\n")
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 13 16:57:11 2017
@author: red-sky
"""
import sys
import numpy as np
import pickle
import pandas as pd
def main(VectorsPath, EventPath, StockPricePath, days):
with open(VectorsPath, "rb") as H:
Vec = pickle.load(H)
Vectors = np.array([list(b[0]) for a, b in Vec.values()])
# Vectors = np.load(VectorsPath)
with open(EventPath, "r") as H:
F = np.array([a.split("\t")[0:4] for a in H.read().splitlines()])
D = {}
for date, vec in zip(F[:, 0], Vectors):
if date[:10] in D:
D[date[:10]].append(vec)
else:
D[date[:10]] = [vec]
D2 = {}
for date in sorted(D.keys()):
D2[date] = np.mean(D[date], 0)
Dates = np.array(sorted(D2.keys()))
SampleIndex = [list(range(i-days, i)) for i in range(5, len(Dates))]
DataX = []
DateX = []
for listIndex in SampleIndex:
DataX.append([D2[date] for date in Dates[listIndex]])
DateX.append(Dates[listIndex[-1]])
Df = pd.read_csv(StockPricePath)
LabelY = []
DataX_yesData = []
for i, date in enumerate(DateX):
retu = list(Df.loc[Df["Date"] == date]["ReturnOpen"])
print(retu)
if len(retu) > 0:
retu = float(retu[0])*100
if retu > 0:
LabelY.append([1, 0])
if retu < -0:
LabelY.append([0, 1])
if retu <= 0 and retu >= -0:
LabelY.append([0, 1])
DataX_yesData.append(list(DataX[i]))
print(date)
# else:
dataX = np.array(DataX_yesData)
dataY = np.array(LabelY)
print("DataX:", dataX.shape)
print("DataY:", dataY.shape, np.sum(dataY, 0) / np.sum(dataY))
return (dataX, dataY)
if __name__ == "__main__":
VectorsPath = sys.argv[1]
EventPath = sys.argv[2]
StockPricePath = sys.argv[3]
days = int(sys.argv[5])
DataX, LabelY = main(VectorsPath, EventPath, StockPricePath, days)
DataPath = sys.argv[4]
np.save(arr=DataX, file=DataPath+"/DailyVector" + sys.argv[5] + ".npy")
np.save(arr=LabelY, file=DataPath+"/DailyReturn" + sys.argv[5] + ".npy")