Add : Event Embedding code & data, 최종 보고서 수정

최재은
Commit ac58ec2bf419487edd6d2fac853bd293836682f1 ac58ec2b 1 parent f203937f
Showing 41 changed files with 984 additions and 0 deletions
소스코드/Bert_CNN_English.ipynb
소스코드/Korean pre processing.ipynb
소스코드/data/apple_combined_data.csv
소스코드/data/apple_combined_data2015 .csv
소스코드/event embedding/Data/APPL_stock_price.csv
소스코드/event embedding/Data/Apple_query_result_body.txt
소스코드/event embedding/Data/EventNew.txt
소스코드/event embedding/Data/IndexedEvents.npy
소스코드/event embedding/Data/ModelEvents_result/DailyReturn.npy
소스코드/event embedding/Data/ModelEvents_result/DailyReturn30.npy
소스코드/event embedding/Data/ModelEvents_result/DailyVector.npy
소스코드/event embedding/Data/ModelEvents_result/DailyVector30.npy
소스코드/event embedding/Data/ModelEvents_result/TrainedParams.pickle
소스코드/event embedding/Data/ModelEvents_result/resultEmbeding.pickle
소스코드/event embedding/Data/ModelEvents_result/test.py
소스코드/event embedding/Data/ModelStock_result/Base_Model_1/model2_price_move_predict.hdf5
소스코드/event embedding/Data/ModelStock_result/Base_Model_1/model2_price_move_predict.json
소스코드/event embedding/Data/ModelStock_result/Base_Model_2/model2_price_move_predict.hdf5
소스코드/event embedding/Data/ModelStock_result/Base_Model_2/model2_price_move_predict.json
소스코드/event embedding/Data/ModelStock_result/Proposed_Model/model2_price_move_predict.hdf5
--- a/소스코드/Bert_CNN_English.ipynb 0 → 100644
View file @ac58ec2
+++ b/소스코드/Bert_CNN_English.ipynb 0 → 100644
View file @ac58ec2
--- a/소스코드/Korean pre processing.ipynb 0 → 100644
View file @ac58ec2
+++ b/소스코드/Korean pre processing.ipynb 0 → 100644
View file @ac58ec2
--- a/소스코드/data/apple_combined_data.csv 0 → 100644
View file @ac58ec2
+++ b/소스코드/data/apple_combined_data.csv 0 → 100644
View file @ac58ec2
--- a/소스코드/data/apple_combined_data2015 .csv 0 → 100644
View file @ac58ec2
+++ b/소스코드/data/apple_combined_data2015 .csv 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/APPL_stock_price.csv 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/APPL_stock_price.csv 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/Apple_query_result_body.txt 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/Apple_query_result_body.txt 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/EventNew.txt 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/EventNew.txt 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/IndexedEvents.npy 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/IndexedEvents.npy 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/ModelEvents_result/DailyReturn.npy 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelEvents_result/DailyReturn.npy 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/ModelEvents_result/DailyReturn30.npy 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelEvents_result/DailyReturn30.npy 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/ModelEvents_result/DailyVector.npy 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelEvents_result/DailyVector.npy 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/ModelEvents_result/DailyVector30.npy 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelEvents_result/DailyVector30.npy 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/ModelEvents_result/TrainedParams.pickle 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelEvents_result/TrainedParams.pickle 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/ModelEvents_result/resultEmbeding.pickle 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelEvents_result/resultEmbeding.pickle 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/ModelEvents_result/test.py 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelEvents_result/test.py 0 → 100644
View file @ac58ec2
+import numpy as np
+
+print(np.load("./resultEmbeding.pickle", allow_pickle=True))
+D
\ No newline at end of file
--- a/소스코드/event embedding/Data/ModelStock_result/Base_Model_1/model2_price_move_predict.hdf5 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelStock_result/Base_Model_1/model2_price_move_predict.hdf5 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/ModelStock_result/Base_Model_1/model2_price_move_predict.json 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelStock_result/Base_Model_1/model2_price_move_predict.json 0 → 100644
View file @ac58ec2
+{"class_name": "Sequential", "config": [{"class_name": "Flatten", "config": {"name": "flatten_1", "trainable": true, "batch_input_shape": [null, 5, 80], "dtype": "float32"}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 512, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.8}}, {"class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "units": 1024, "activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dropout", "config": {"name": "dropout_2", "trainable": true, "rate": 0.8}}, {"class_name": "Dropout", "config": {"name": "dropout_3", "trainable": true, "rate": 0.8}}, {"class_name": "Dense", "config": {"name": "dense_3", "trainable": true, "units": 2, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}], "keras_version": "2.0.2", "backend": "theano"}
\ No newline at end of file
--- a/소스코드/event embedding/Data/ModelStock_result/Base_Model_2/model2_price_move_predict.hdf5 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelStock_result/Base_Model_2/model2_price_move_predict.hdf5 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/ModelStock_result/Base_Model_2/model2_price_move_predict.json 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelStock_result/Base_Model_2/model2_price_move_predict.json 0 → 100644
View file @ac58ec2
+{"class_name": "Sequential", "config": [{"class_name": "Conv1D", "config": {"name": "conv1d_1", "trainable": true, "batch_input_shape": [null, 5, 80], "dtype": "float32", "filters": 128, "kernel_size": [1], "strides": [1], "padding": "valid", "dilation_rate": [1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Conv1D", "config": {"name": "conv1d_2", "trainable": true, "filters": 128, "kernel_size": [3], "strides": [1], "padding": "same", "dilation_rate": [1], "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "MaxPooling1D", "config": {"name": "max_pooling1d_1", "trainable": true, "strides": [2], "pool_size": [2], "padding": "valid"}}, {"class_name": "Flatten", "config": {"name": "flatten_1", "trainable": true}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.8}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 2, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}], "keras_version": "2.0.2", "backend": "theano"}
\ No newline at end of file
--- a/소스코드/event embedding/Data/ModelStock_result/Proposed_Model/model2_price_move_predict.hdf5 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelStock_result/Proposed_Model/model2_price_move_predict.hdf5 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/ModelStock_result/Proposed_Model/model2_price_move_predict.json 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/ModelStock_result/Proposed_Model/model2_price_move_predict.json 0 → 100644
View file @ac58ec2
+{"class_name": "Sequential", "config": [{"class_name": "LSTM", "config": {"name": "lstm_1", "trainable": true, "batch_input_shape": [null, 5, 80], "dtype": "float32", "return_sequences": false, "go_backwards": false, "stateful": false, "unroll": false, "implementation": 0, "units": 256, "activation": "tanh", "recurrent_activation": "hard_sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "unit_forget_bias": true, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 0.0010000000474974513}}, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0}}, {"class_name": "Dropout", "config": {"name": "dropout_1", "trainable": true, "rate": 0.6}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "units": 2, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "VarianceScaling", "config": {"scale": 1.0, "mode": "fan_avg", "distribution": "uniform", "seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0, "l2": 0.0010000000474974513}}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}], "keras_version": "2.0.2", "backend": "theano"}
\ No newline at end of file
--- a/소스코드/event embedding/Data/Processed_Events.txt 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/Processed_Events.txt 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/Result_query_Apple.json 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/Result_query_Apple.json 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/Vocab.txt 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/Vocab.txt 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/data_stock.dat 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/data_stock.dat 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Data/data_stock_2.dat 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Data/data_stock_2.dat 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Model_EventEmbeding/EmbeddingLayer.py 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Model_EventEmbeding/EmbeddingLayer.py 0 → 100644
View file @ac58ec2
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar 20 23:41:51 2017
+
+@author: red-sky
+"""
+
+
+import numpy as np
+import theano
+from theano import tensor as T
+
+
+class EmbeddingLayer(object):
+    def __init__(self, num_vocab, word_dim, rng, embedding_w=None):
+        '''
+        word_dim :: dimension of the word embeddings
+        num_vocab :: number of word embeddings in the vocabulary
+        embedding_w :: pre-train word vector
+        '''
+
+        if embedding_w is None:
+            word_vectors = rng.uniform(-1.0, 1.0, (num_vocab, word_dim))
+            self.embedding_w = theano.shared(word_vectors,
+                                             name="EmbeddingLayer_W") \
+                .astype(theano.config.floatX)
+        else:
+            self.embedding_w = theano.shared(embedding_w,
+                                             name="EmbeddingLayer_W") \
+                .astype(theano.config.floatX)
+
+        self.params = [self.embedding_w]
+        self.infor = [num_vocab, word_dim]
+
+    def words_ind_2vec(self, index):
+        map_word_vectors = self.embedding_w[index]
+        output = T.mean(map_word_vectors,  axis=0)
+        return output, map_word_vectors
+
+
+if __name__ == "__main__":
+    rng = np.random.RandomState(220495)
+    arrWords = T.ivector("words")
+    EMBD = EmbeddingLayer(100, 150, rng=rng)
+    Word2Vec = theano.function(
+        inputs=[arrWords],
+        outputs=EMBD.words_ind_2vec(arrWords)
+    )
+    Vec = Word2Vec([1, 2, 3, 4])
+    Vec = Word2Vec([2, 3, 4])
+    print("Dim: ", Vec.shape)
+    print("Val: ", Vec)
--- a/소스코드/event embedding/Model_EventEmbeding/EventEmbedding.py 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Model_EventEmbeding/EventEmbedding.py 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Model_EventEmbeding/RoleDependentLayer.py 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Model_EventEmbeding/RoleDependentLayer.py 0 → 100644
View file @ac58ec2
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Mar 25 16:13:18 2017
+
+@author: red-sky
+"""
+
+import theano
+import numpy as np
+import theano.tensor as T
+from SmallUtils import createShareVar
+
+
+class RoleDependentLayer(object):
+    def __init__(self, left_dependent, right_dependent, rng,
+                 n_in=100, n_out=4, trainedParams=None,
+                 name="RoleDependentEmbedding_"):
+        if trainedParams is None:
+            trainedParams = {
+                name: {
+                    "T": None, "W1": None, "W2": None, "b": None
+                }
+            }
+
+        if trainedParams[name]["T"] is not None:
+            assert trainedParams[name]["T"].shape == (n_out, n_in, n_in)
+            self.T = theano.shared(value=trainedParams[name]["T"],
+                                   name=name+"T", borrow=True)
+        else:
+            self.T = createShareVar(rng=rng, name=name+"T",
+                                    factor_for_init=n_out + n_in,
+                                    dim=(n_out, n_in, n_in))
+
+        if trainedParams[name]["W1"] is not None:
+            assert trainedParams[name]["W1"].shape == (n_in, n_out)
+            self.W1 = theano.shared(value=trainedParams[name]["W1"],
+                                    name=name+"W1", borrow=True)
+        else:
+            self.W1 = createShareVar(rng=rng, name=name+"W1",
+                                     factor_for_init=n_out + n_in,
+                                     dim=(n_in, n_out))
+
+        if trainedParams[name]["W2"] is not None:
+            assert trainedParams[name]["W2"].shape == (n_in, n_out)
+            self.W2 = theano.shared(value=trainedParams[name]["W2"],
+                                    name=name+"W2", borrow=True)
+        else:
+            self.W2 = createShareVar(rng=rng, name=name+"W2",
+                                     factor_for_init=n_out + n_in,
+                                     dim=(n_in, n_out))
+
+        if trainedParams[name]["b"] is not None:
+            assert trainedParams[name]["b"].shape == (n_out,)
+            self.b = theano.shared(value=trainedParams[name]["b"],
+                                   name=name+"b", borrow=True)
+        else:
+            b_values = np.zeros(shape=(n_out,), dtype=theano.config.floatX)
+            self.b = theano.shared(value=b_values, name=name+"b", borrow=True)
+
+        # list of layer params
+        self.params = [self.T, self.W1, self.W2, self.b]
+
+        # L2 regulation
+        self.L2 = sum([(param**2).sum() for param in self.params])
+
+        # Bi-linear step
+        def one_kernel(Tk, left, right):
+            first_bi_libear = theano.dot(left, Tk)
+            seccon_bi_linear = theano.dot(first_bi_libear, right)
+            return(seccon_bi_linear.flatten())
+
+        bi_1, _ = theano.scan(
+            fn=one_kernel,
+            sequences=[self.T],
+            non_sequences=[left_dependent, right_dependent],
+            n_steps=n_out
+        )
+
+        # Feed forward network step
+        feedforward_step1 = theano.dot(left_dependent, self.W1)
+        feedforward_step2 = theano.dot(right_dependent, self.W2)
+        feedforward_step3 = (feedforward_step1 +
+                             feedforward_step2.dimshuffle("x", 0) +
+                             self.b.dimshuffle("x", 0))
+        feedforward_step4 = bi_1.dimshuffle(1, 0) + feedforward_step3
+        self.output = theano.tensor.tanh(feedforward_step4)
+        self.test = [feedforward_step3]
+
+    def output_(self, left_dependent, right_dependent):
+
+        def one_kernel(Tk, left, right):
+            first_bi_libear = theano.dot(left, Tk)
+            seccon_bi_linear = theano.dot(first_bi_libear, right)
+            return(seccon_bi_linear.flatten())
+
+        bi_linear_tensor, _ = theano.scan(
+            fn=one_kernel,
+            sequences=[self.T],
+            non_sequences=[left_dependent, right_dependent],
+            n_steps=n_out
+        )
+
+        bi_linear_tensor = bi_linear_tensor.dimshuffle(1, 0)
+        feedforward_step1 = theano.dot(left_dependent, self.W1)
+        feedforward_step2 = theano.dot(right_dependent, self.W2)
+        feedforward_step3 = (feedforward_step1 +
+                             feedforward_step2.dimshuffle("x", 0) +
+                             self.b.dimshuffle("x", 0))
+        feedforward_step4 = bi_linear_tensor + feedforward_step3
+        output = theano.tensor.tanh(feedforward_step4)
+        return(output)
+
+    def get_params(self):
+        trainedParams = {
+            "T": self.T.get_value(), "W1": self.W1.get_value(),
+            "W2": self.W2.get_value(), "b": self.b.get_value()
+        }
+        return(trainedParams)
--- a/소스코드/event embedding/Model_EventEmbeding/SmallUtils.py 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Model_EventEmbeding/SmallUtils.py 0 → 100644
View file @ac58ec2
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Mar 25 15:55:14 2017
+
+@author: red-sky
+"""
+import theano
+import theano.tensor as T
+import numpy as np
+
+def createShareVar(rng, dim, name, factor_for_init):
+    var_values = np.asarray(
+        rng.uniform(
+            low=-np.sqrt(6.0 / factor_for_init),
+            high=np.sqrt(6.0 / factor_for_init),
+            size=dim,
+        )
+    )
+    Var = theano.shared(value=var_values, name=name, borrow=True)
+    return Var
+
+
+def adadelta(lr, tparams, cost, grads, listInput):
+    """
+    An adaptive learning rate optimizer
+
+    Parameters
+    ----------
+    lr : Theano SharedVariable
+        Initial learning rate
+    tpramas: Theano SharedVariable
+        Model parameters
+    grads: Theano variable
+        Gradients of cost w.r.t to parameres
+
+    cost: Theano variable
+        Objective fucntion to minimize
+
+    Notes
+    -----
+    For more information, see [ADADELTA]_.
+
+    .. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
+       Rate Method*, arXiv:1212.5701.
+    """
+    np_float = np.asarray(0., dtype=theano.config.floatX)
+    zipped_grads = [theano.shared(p.get_value() * np_float,
+                                  name='%s_grad' % k)
+                    for k, p in enumerate(tparams)]
+    running_up2 = [theano.shared(p.get_value() * np_float,
+                                 name='%s_rup2' % k)
+                   for k, p in enumerate(tparams)]
+    running_grads2 = [theano.shared(p.get_value() * np_float,
+                                    name='%s_rgrad2' % k)
+                      for k, p in enumerate(tparams)]
+
+    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
+    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
+             for rg2, g in zip(running_grads2, grads)]
+
+    f_grad_shared = theano.function(inputs=listInput,
+                                    outputs=cost,
+                                    updates=zgup + rg2up,
+                                    name='adadelta_f_grad_shared')
+
+    updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg
+             for zg, ru2, rg2 in zip(zipped_grads,
+                                     running_up2,
+                                     running_grads2)]
+    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
+             for ru2, ud in zip(running_up2, updir)]
+    param_up = [(p, p + ud) for p, ud in zip(tparams, updir)]
+
+    f_update = theano.function([lr], [], updates=ru2up + param_up,
+                               on_unused_input='ignore',
+                               name='adadelta_f_update')
+
+    return f_grad_shared, f_update
+
+
+def ADAM_OPTIMIZER(loss, all_params, learning_rate=0.001,
+                   b1=0.9, b2=0.999, e=1e-8, gamma=1-1e-8):
+    """
+    CITE: http://sebastianruder.com/optimizing-gradient-descent/index.html#adam
+    ADAM update rules
+    Default values are taken from [Kingma2014]
+    References:
+    [Kingma2014] Kingma, Diederik, and Jimmy Ba.
+    "Adam: A Method for Stochastic Optimization."
+    arXiv preprint arXiv:1412.6980 (2014).
+    http://arxiv.org/pdf/1412.6980v4.pdf
+    """
+    updates = []
+    all_grads = theano.grad(loss, all_params)
+    alpha = learning_rate
+    t = theano.shared(np.float32(1))
+    # (Decay the first moment running average coefficient)
+    b1_t = b1*gamma**(t-1)
+
+    for params_previous, g in zip(all_params, all_grads):
+        init_moment = np.zeros(params_previous.get_value().shape,
+                               dtype=theano.config.floatX)
+        # (the mean)
+        first_moment = theano.shared(init_moment)
+        # (the uncentered variance)
+        second_moment = theano.shared(init_moment)
+
+        # (Update biased first moment estimate)
+        bias_m = b1_t*first_moment + (1 - b1_t)*g
+
+        # (Update biased second raw moment estimate)
+        bias_v = b2*second_moment + (1 - b2)*g**2
+
+        # (Compute bias-corrected first moment estimate)
+        unbias_m = bias_m / (1-b1**t)
+
+        # (Compute bias-corrected second raw moment estimate)
+        unbias_v = bias_v / (1-b2**t)
+
+        # (Update parameters)
+        update_term = (alpha * unbias_m) / (T.sqrt(unbias_v) + e)
+        params_new = params_previous - update_term
+
+        updates.append((first_moment, bias_m))
+        updates.append((second_moment, bias_v))
+        updates.append((params_previous, params_new))
+    updates.append((t, t + 1.))
+    return updates
\ No newline at end of file
--- a/소스코드/event embedding/Model_EventEmbeding/__pycache__/EmbeddingLayer.cpython-36.pyc 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Model_EventEmbeding/__pycache__/EmbeddingLayer.cpython-36.pyc 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Model_EventEmbeding/__pycache__/RoleDependentLayer.cpython-36.pyc 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Model_EventEmbeding/__pycache__/RoleDependentLayer.cpython-36.pyc 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Model_EventEmbeding/__pycache__/SmallUtils.cpython-36.pyc 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Model_EventEmbeding/__pycache__/SmallUtils.cpython-36.pyc 0 → 100644
View file @ac58ec2
--- a/소스코드/event embedding/Model_StockMovePredict/train_model_CNN.py 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Model_StockMovePredict/train_model_CNN.py 0 → 100644
View file @ac58ec2
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Apr 13 17:01:36 2017
+
+@author: red-sky
+"""
+
+import sys
+import numpy as np
+np.random.seed(280295)
+import keras.backend as K
+from keras.models import Sequential
+from keras.layers import Dense, Activation, Dropout, Flatten
+from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D
+from keras.callbacks import ModelCheckpoint, EarlyStopping
+from keras import regularizers, optimizers
+
+
+def recall(y_true, y_pred):
+    """Recall metric.
+
+    Only computes a batch-wise average of recall.
+
+    Computes the recall, a metric for multi-label classification of
+    how many relevant items are selected.
+    """
+    true_positives = K.sum(K.round(K.clip(y_true[:, 1] * y_pred[:, 1], 0, 1)))
+    possible_positives = K.sum(K.round(K.clip(y_true[:, 1], 0, 1)))
+    recall = true_positives / (possible_positives + K.epsilon())
+    return recall
+
+
+def precision(y_true, y_pred):
+    """Precision metric.
+
+    Only computes a batch-wise average of precision.
+
+    Computes the precision, a metric for multi-label classification of
+    how many selected items are relevant.
+    """
+    true_positives = K.sum(K.round(K.clip(y_true[:, 1] * y_pred[:, 1], 0, 1)))
+    predicted_positives = K.sum(K.round(K.clip(y_pred[:, 1], 0, 1)))
+    precision = true_positives / (predicted_positives + K.epsilon())
+    return precision
+
+
+def fbeta_score(y_true, y_pred):
+
+    # If there are no true positives, fix the F score at 0 like sklearn.
+    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
+        return 0
+
+    p = precision(y_true, y_pred)
+    r = recall(y_true, y_pred)
+    bb = 1 ** 2
+    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
+    return fbeta_score
+
+
+def main(dataX_path, dataY_path, result_path,
+         n_epoch, input_dim, days):
+
+    # load data
+    np.random.seed(2204)
+    X = np.load(dataX_path)
+    Y = np.load(dataY_path)
+
+    # build Model
+    model = Sequential()
+    model.add(Conv1D(128, 1, activation='relu', input_shape=(days, input_dim)))
+    model.add(Conv1D(128, 3, activation='relu', padding='same'))
+    model.add(MaxPooling1D(2))
+    model.add(Flatten())
+    model.add(Dropout(0.8))
+    model.add(Dense(2, activation='softmax'))
+    adam = optimizers.Adam(lr=0.001)
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=adam,
+                  metrics=['accuracy', recall, precision, fbeta_score])
+
+    # model Compile
+    model_name = result_path+'model2_price_move_predict.hdf5'
+    checkpointer = ModelCheckpoint(filepath=model_name,
+                                   monitor='val_fbeta_score',
+                                   verbose=2, save_best_only=True)
+    earlystopper = EarlyStopping(monitor='val_loss', patience=20, verbose=2)
+
+    outmodel = open(result_path+'model2_price_move_predict.json', 'w')
+    outmodel.write(model.to_json())
+    outmodel.close()
+
+    # process Training
+    model.fit(X, Y, batch_size=32, verbose=2,
+              validation_split=0.1, epochs=n_epoch,
+              callbacks=[checkpointer])
+
+
+if __name__ == "__main__":
+    dataX = sys.argv[1]
+    dataY = sys.argv[2]
+    model_path = sys.argv[3]
+    n_epoch = int(sys.argv[4])
+    input_dim = int(sys.argv[5])
+    days = int(sys.argv[6])
+    main(dataX, dataY, model_path, n_epoch, input_dim, days)
--- a/소스코드/event embedding/Model_StockMovePredict/train_model_LSTM.py 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Model_StockMovePredict/train_model_LSTM.py 0 → 100644
View file @ac58ec2
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Apr 13 17:01:36 2017
+
+@author: red-sky
+"""
+
+import sys
+import numpy as np
+np.random.seed(280295)
+import keras.backend as K
+from keras.models import Sequential
+from keras import regularizers, optimizers
+from keras.layers import Dense, Activation, LSTM, Dropout
+from keras.callbacks import ModelCheckpoint, EarlyStopping
+
+def recall(y_true, y_pred):
+    """Recall metric.
+
+    Only computes a batch-wise average of recall.
+
+    Computes the recall, a metric for multi-label classification of
+    how many relevant items are selected.
+    """
+    true_positives = K.sum(K.round(K.clip(y_true[:, 0] * y_pred[:, 0], 0, 1)))
+    possible_positives = K.sum(K.round(K.clip(y_true[:, 0], 0, 1)))
+    recall = true_positives / (possible_positives + K.epsilon())
+    return recall
+
+
+def precision(y_true, y_pred):
+    """Precision metric.
+
+    Only computes a batch-wise average of precision.
+
+    Computes the precision, a metric for multi-label classification of
+    how many selected items are relevant.
+    """
+    true_positives = K.sum(K.round(K.clip(y_true[:, 0] * y_pred[:, 0], 0, 1)))
+    predicted_positives = K.sum(K.round(K.clip(y_pred[:, 0], 0, 1)))
+    precision = true_positives / (predicted_positives + K.epsilon())
+    return precision
+
+
+def fbeta_score(y_true, y_pred):
+
+    # If there are no true positives, fix the F score at 0 like sklearn.
+    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
+        return 0
+
+    p = precision(y_true, y_pred)
+    r = recall(y_true, y_pred)
+    bb = 1 ** 2
+    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
+    return fbeta_score
+
+
+def main(dataX_path, dataY_path, result_path,
+         n_epoch, input_dim, days):
+
+    # load data
+    np.random.seed(2204)
+    X = np.load(dataX_path)
+    Y = np.load(dataY_path)
+
+    # build Model
+    model = Sequential()
+    model.add(LSTM(256, input_shape=(days, input_dim),
+                   kernel_regularizer=regularizers.l2(0.001)))
+
+    model.add(Dropout(0.6))
+    model.add(Dense(2, activation='softmax',
+                    kernel_regularizer=regularizers.l2(0.001)))
+    adam = optimizers.Adam(lr=0.001)
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=adam,
+                  metrics=['accuracy', recall, precision, fbeta_score])
+
+    # model Compile
+    model_name = result_path+'model2_price_move_predict.hdf5'
+    checkpointer = ModelCheckpoint(filepath=model_name,
+                                   monitor='val_fbeta_score', mode="max",
+                                   verbose=2, save_best_only=True)
+    earlystopper = EarlyStopping(monitor='val_loss', patience=20, verbose=2)
+
+    outmodel = open(result_path+'model2_price_move_predict.json', 'w')
+    outmodel.write(model.to_json())
+    outmodel.close()
+
+    # process Training
+    model.fit(X, Y, batch_size=32, verbose=2,
+              validation_split=0.1, epochs=n_epoch,
+              callbacks=[checkpointer])
+
+if __name__ == "__main__":
+    dataX = sys.argv[1]
+    dataY = sys.argv[2]
+    model_path = sys.argv[3]
+    n_epoch = int(sys.argv[4])
+    input_dim = int(sys.argv[5])
+    days = int(sys.argv[6])
+    main(dataX, dataY, model_path, n_epoch, input_dim, days)
--- a/소스코드/event embedding/Model_StockMovePredict/train_model_fullly_connected.py 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Model_StockMovePredict/train_model_fullly_connected.py 0 → 100644
View file @ac58ec2
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Apr 13 17:01:36 2017
+
+@author: red-sky
+"""
+
+import sys
+import numpy as np
+np.random.seed(280295)
+import keras.backend as K
+from keras.models import Sequential
+from keras.layers import Dense, Activation, Dropout, Flatten
+from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPooling1D
+from keras.callbacks import ModelCheckpoint, EarlyStopping
+from keras import regularizers, optimizers
+
+
+def recall(y_true, y_pred):
+    """Recall metric.
+
+    Only computes a batch-wise average of recall.
+
+    Computes the recall, a metric for multi-label classification of
+    how many relevant items are selected.
+    """
+    true_positives = K.sum(K.round(K.clip(y_true[:, 0] * y_pred[:, 0], 0, 1)))
+    possible_positives = K.sum(K.round(K.clip(y_true[:, 0], 0, 1)))
+    recall = true_positives / (possible_positives + K.epsilon())
+    return recall
+
+
+def precision(y_true, y_pred):
+    """Precision metric.
+
+    Only computes a batch-wise average of precision.
+
+    Computes the precision, a metric for multi-label classification of
+    how many selected items are relevant.
+    """
+    true_positives = K.sum(K.round(K.clip(y_true[:, 0] * y_pred[:, 0], 0, 1)))
+    predicted_positives = K.sum(K.round(K.clip(y_pred[:, 0], 0, 1)))
+    precision = true_positives / (predicted_positives + K.epsilon())
+    return precision
+
+
+def fbeta_score(y_true, y_pred):
+
+    # If there are no true positives, fix the F score at 0 like sklearn.
+    if K.sum(K.round(K.clip(y_true, 0, 1))) == 0:
+        return 0
+
+    p = precision(y_true, y_pred)
+    r = recall(y_true, y_pred)
+    bb = 1 ** 2
+    fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon())
+    return fbeta_score
+
+
+def main(dataX_path, dataY_path, result_path,
+         n_epoch, input_dim, days):
+
+    # load data
+    np.random.seed(2204)
+    X = np.load(dataX_path)
+    Y = np.load(dataY_path)
+
+    # build Model
+    model = Sequential()
+    model.add(Flatten(input_shape=(days, input_dim)))
+    model.add(Dense(512, activation='sigmoid'))
+    model.add(Dropout(0.8))
+    model.add(Dense(1024, activation='sigmoid'))
+    model.add(Dropout(0.8))
+#    model.add(Dense(1024, activation='sigmoid'))
+    model.add(Dropout(0.8))
+    model.add(Dense(2, activation='softmax'))
+
+    adam = optimizers.Adam(lr=0.001)
+    model.compile(loss='categorical_crossentropy',
+                  optimizer=adam,
+                  metrics=['accuracy', recall, precision, fbeta_score])
+
+    # model Compile
+    model_name = result_path+'model2_price_move_predict.hdf5'
+    checkpointer = ModelCheckpoint(filepath=model_name, monitor='val_acc',
+                                   verbose=2, save_best_only=True)
+    earlystopper = EarlyStopping(monitor='val_loss', patience=20, verbose=2)
+
+    outmodel = open(result_path+'model2_price_move_predict.json', 'w')
+    outmodel.write(model.to_json())
+    outmodel.close()
+
+    # process Training
+    model.fit(X, Y, batch_size=32, verbose=2,
+              validation_split=0.1, epochs=n_epoch,
+              callbacks=[checkpointer])
+
+
+if __name__ == "__main__":
+    dataX = sys.argv[1]
+    dataY = sys.argv[2]
+    model_path = sys.argv[3]
+    n_epoch = int(sys.argv[4])
+    input_dim = int(sys.argv[5])
+    days = int(sys.argv[6])
+    main(dataX, dataY, model_path, n_epoch, input_dim, days)
--- a/소스코드/event embedding/Pre-Process/craw_headline_news_from_bloomberg.py 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Pre-Process/craw_headline_news_from_bloomberg.py 0 → 100644
View file @ac58ec2
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 16 21:57:57 2017
+
+@author: red-sky
+"""
+
+import bs4
+import json
+import sys
+import urllib.request as urlreq
+from bs4 import BeautifulSoup
+import requests
+
+BLOOMBERG_params = {
+    "sort_by_newest": "time:desc",
+    "sort_by_oldest": "time:asc",
+    "source_from_bloomberg": "sites=bview",
+    "end_time": "2017-03-12T15:20:16.240Z"
+}
+
+DATA_TO_EXTRACT = {
+    "query_list_news": ["div", {"class": "search-result-story__container"}],
+    "query_headline": ["h1", {"class": "search-result-story__headline"}],
+    "query_time_published": ["time", {"class": "published-at"}],
+    "query_body": ["div", {"class": "search-result-story__body"}]
+}
+
+
+def parser_url(query_string, page,
+               sort_by="sort_by_oldest",
+               source="source_from_bloomberg"):
+    url = "https://www.bloomberg.com/"
+    # add search query
+    url = url + "search?query=" + query_string + "&"
+    # add sort by
+    url = url + "sort=" + BLOOMBERG_params[sort_by] + "&"
+    # add time to query -- use present time
+    url = url + "sites=" + BLOOMBERG_params[source] + "&"
+    # add page number
+    url = url + "page=" + str(page)
+    return url
+
+
+def get_rid_off_key(list_contents):
+    body_string = ""
+    for substring in list_contents:
+        if (type(substring) == bs4.element.Tag):
+            # join all body string and
+            # eliminate highlight query string key
+            body_string += substring.string
+        else:
+            if (type(substring.string) == bs4.element.NavigableString):
+                body_string += substring.string
+    return(body_string)
+
+
+def extract_from_url(url):
+    try:
+        with requests.get(url) as response:
+            html_of_page = response.read()
+            soup_object = BeautifulSoup(html_of_page, "lxml")
+        # Extract list of news in soup object
+        param_to_find = DATA_TO_EXTRACT["query_list_news"]
+        list_of_news = soup_object.find_all(param_to_find[0],
+                                            attrs=param_to_find[1])
+        if (len(list_of_news) == 0):
+            return None
+        # create list result extracted
+        result = []
+        for block_new in list_of_news:
+            # extract time from block
+            param_to_find = DATA_TO_EXTRACT["query_time_published"]
+            time = block_new.find_all(param_to_find[0],
+                                      attrs=param_to_find[1])
+            time = time[0]["datetime"]
+
+            # extract new headline
+            param_to_find = DATA_TO_EXTRACT["query_headline"]
+            headline = block_new.find_all(param_to_find[0],
+                                          attrs=param_to_find[1])
+            headline = get_rid_off_key(headline[0].a.contents)
+
+            # extract new body list if string
+            param_to_find = DATA_TO_EXTRACT["query_body"]
+            body = block_new.find_all(param_to_find[0],
+                                      attrs=param_to_find[1])
+            print(body)
+
+            body_string = get_rid_off_key(body[0].contents)
+            extracted_from_block = {"time": time,
+                                    "headline": headline,
+                                    "body": body_string}
+            # for debug :
+            # print("\t".join(extracted_from_block))
+            if len(body_string) >= 5:
+                result.append(extracted_from_block)
+    except Exception as inst:
+        print("Something whenwrong :)", inst)
+        print("ULR: ", url)
+        result = []
+    return(result)
+
+
+def Query(key, max_page=5000):
+    # Init page and looping until return None
+    page = 1
+    result = "not None"
+    all_result_query = []
+    error = 0
+    while True and page < max_page:
+        print("Colected: %d articles" % len(all_result_query))
+        new_url = parser_url(key, page)
+        result = extract_from_url(new_url)
+        if len(result) > 0 or error > 10:
+            page += 1
+            error = 0
+        else:
+            error += 1
+
+        if result is not None:
+            all_result_query += result
+        else:
+            break
+    return(all_result_query)
+
+
+if __name__ == "__main__":
+    print("Begin query information about: ", sys.argv[1])
+    print("Then will save result in: ", sys.argv[2])
+
+    News = Query(sys.argv[1], int(sys.argv[4]))
+    file_name1 = sys.argv[2]
+
+    with open(file_name1, "w") as W:
+        json.dump(News, W, indent=1)
+
+    file_name2 = sys.argv[3]
+    with open(file_name2, "w") as W:
+        W.write("\n".join([new["body"] for new in News]))
--- a/소스코드/event embedding/Pre-Process/extractVocab.py 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Pre-Process/extractVocab.py 0 → 100644
View file @ac58ec2
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar 20 17:52:11 2017
+
+@author: red-sky
+"""
+import sys
+import json
+import numpy as np
+
+
+def updateDict(words, dictUp):
+    # update word dictionary with given "words" and the dict "dictUp"
+    for w in words:
+        if w in dictUp:
+            dictUp[w] += 1
+        else:
+            dictUp[w] = 0
+    return dictUp
+
+def extractVocab(eventsFile, fromIndex=0, toIndex="END"):
+    # from Events file, extract infor about words and create a mapping
+    vocab = dict()
+    with open(eventsFile, "r") as file:
+        list_events = file.read().strip().splitlines()
+        if toIndex == -1:
+            list_events = list_events[fromIndex:]
+        else:
+            list_events = sorted(set(list_events[fromIndex:toIndex]))
+    for i, event in enumerate(list_events):
+        if event[0] != "\t":
+            index = i
+            break
+    list_events = list_events[index:]
+    for event in list_events:
+        event = event.split("\t")
+        words = event[1].split(" ") + \
+            event[2].split(" ") + \
+            event[3].split(" ")
+        vocab = updateDict(words, vocab)
+    vocab_words = vocab.keys()
+    support_words = ["NOISEWORDS"]
+    vocab_words = support_words + \
+        sorted(vocab_words, key=lambda x: vocab[x], reverse=True)
+    IndexWords = range(len(vocab_words))
+    Count = ["NOISEWORDS"] + [vocab[w] for w in vocab_words[1:]]
+    result = [dict(zip(vocab_words, Count)),
+              dict(zip(IndexWords, vocab_words)),
+              dict(zip(vocab_words, IndexWords))]
+    return result, list_events
+
+
+def convertEvent(eventsFile, vocabMapping, countMin=5):
+    # convert all Events to index for training
+    wordCount, _, word2index = vocabMapping
+    Events = []
+    with open(eventsFile, "r") as file:
+        list_events = file.read().strip().splitlines()
+
+    for event in list_events:
+        event = event.split("\t")
+        list_obj = [event[1].split(" "),
+                    event[2].split(" "),
+                    event[3].split(" ")]
+
+        # Covert only words that appear more than countMin
+        wordsIndexed = []
+        for obj in list_obj:
+            objIndex = []
+            for w in obj:
+                if wordCount[w] >= countMin:
+                    objIndex.append(word2index[w])
+                else:
+                    objIndex.append(0)
+            wordsIndexed.append(objIndex)
+        Events.append(wordsIndexed)
+    return Events
+
+
+if __name__ == "__main__":
+    # in
+    EventPath = "../../Thesis_data/Apple_query_result_body.txt"
+    fromIndex = 0
+    toIndex = -1
+    minCountWord = 5
+    # out
+    EventNewPath = "./Events_for_training.txt"
+    VocabPath = "./Vocab_in_events_for_training.json"
+    IndexdEventPath = "./IndexedEvents_for_training.npy"
+
+    vocabMapping, EventNew = extractVocab(EventPath, fromIndex, toIndex)
+    with open(VocabPath, "w") as W:
+        json.dump(vocabMapping, W, indent=2)
+
+    with open(EventNewPath, "w") as W:
+        W.write("\n".join(EventNew))
+
+    indexed_events = convertEvent(EventNewPath, vocabMapping, minCountWord)
+    np.save(arr=np.array(indexed_events), file=IndexdEventPath)
--- a/소스코드/event embedding/Pre-Process/findDateOfEvents.py 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Pre-Process/findDateOfEvents.py 0 → 100644
View file @ac58ec2
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar 20 11:58:54 2017
+
+@author: red-sky
+"""
+
+import sys
+import json
+
+
+def findDate(news_body, list_news):
+    date = ""
+    for ind, new in enumerate(list_news):
+        if news_body in new["body"]:
+            date = new["time"]
+            break
+    return date
+
+
+def extractAllDate(list_events, list_news, choosedInfor=[1, 2, 3, 0, 6]):
+    list_result = []
+    N = len(list_events)
+    i = 0.0
+    for event in list_events:
+        i += 1
+        if i % 1000 == 0:
+            print("Done %f percents" % (i/N*100))
+        date = [findDate(event[6], list_news)]
+        infor = date + [event[i] for i in choosedInfor]
+        list_result.append(infor)
+    return list_result
+
+if __name__ == "__main__":
+    events = open(sys.argv[1], "r").read().strip().splitlines()
+    events = [event.split("\t") for event in events
+              if len(event.split("\t")) > 5]
+    news = json.load(open(sys.argv[2], "r"))
+    result = extractAllDate(events, news)
+
+    with open(sys.argv[3], "w") as W:
+        for line in result[1:]:
+            W.write("\t".join(line)+"\n")
--- a/소스코드/event embedding/Pre-Process_2/processing_for_model2.py 0 → 100644
View file @ac58ec2
+++ b/소스코드/event embedding/Pre-Process_2/processing_for_model2.py 0 → 100644
View file @ac58ec2
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Apr 13 16:57:11 2017
+
+@author: red-sky
+"""
+import sys
+import numpy as np
+import pickle
+import pandas as pd
+
+
+def main(VectorsPath, EventPath, StockPricePath, days):
+
+    with open(VectorsPath, "rb") as H:
+        Vec = pickle.load(H)
+        Vectors = np.array([list(b[0]) for a, b in Vec.values()])
+#    Vectors = np.load(VectorsPath)
+    with open(EventPath, "r") as H:
+        F = np.array([a.split("\t")[0:4] for a in H.read().splitlines()])
+
+    D = {}
+    for date, vec in zip(F[:, 0], Vectors):
+        if date[:10] in D:
+            D[date[:10]].append(vec)
+        else:
+            D[date[:10]] = [vec]
+
+    D2 = {}
+    for date in sorted(D.keys()):
+        D2[date] = np.mean(D[date], 0)
+
+    Dates = np.array(sorted(D2.keys()))
+    SampleIndex = [list(range(i-days, i)) for i in range(5, len(Dates))]
+    DataX = []
+    DateX = []
+    for listIndex in SampleIndex:
+        DataX.append([D2[date] for date in Dates[listIndex]])
+        DateX.append(Dates[listIndex[-1]])
+
+    Df = pd.read_csv(StockPricePath)
+    LabelY = []
+    DataX_yesData = []
+    for i, date in enumerate(DateX):
+        retu = list(Df.loc[Df["Date"] == date]["ReturnOpen"])
+        print(retu)
+        if len(retu) > 0:
+            retu = float(retu[0])*100
+            if retu > 0:
+                LabelY.append([1, 0])
+            if retu < -0:
+                LabelY.append([0, 1])
+            if retu <= 0 and retu >= -0:
+                LabelY.append([0, 1])
+            DataX_yesData.append(list(DataX[i]))
+            print(date)
+#        else:
+
+    dataX = np.array(DataX_yesData)
+    dataY = np.array(LabelY)
+    print("DataX:", dataX.shape)
+    print("DataY:", dataY.shape, np.sum(dataY, 0) / np.sum(dataY))
+    return (dataX, dataY)
+
+if __name__ == "__main__":
+    VectorsPath = sys.argv[1]
+    EventPath = sys.argv[2]
+    StockPricePath = sys.argv[3]
+    days = int(sys.argv[5])
+    DataX, LabelY = main(VectorsPath, EventPath, StockPricePath, days)
+    DataPath = sys.argv[4]
+    np.save(arr=DataX, file=DataPath+"/DailyVector" + sys.argv[5] + ".npy")
+    np.save(arr=LabelY, file=DataPath+"/DailyReturn" + sys.argv[5] + ".npy")
--- a/진행보고서/최종 보고서.doc
View file @ac58ec2
+++ b/진행보고서/최종 보고서.doc
View file @ac58ec2