구현코드, 모델(.h5 file) 및 데이터셋 업로드

Jihoon
Commit baf6f641ef53f60a4a15ddc7acea1d0ac11cd8e5 baf6f641 1 parent bce0761e
Showing 3 changed files with 527 additions and 0 deletions
Model/Dataset/dataset.zip
Model/LSTM_model.ipynb
Model/lstm_model.h5
--- a/Model/Dataset/dataset.zip 0 → 100644
View file @baf6f64
+++ b/Model/Dataset/dataset.zip 0 → 100644
View file @baf6f64
--- a/Model/LSTM_model.ipynb 0 → 100644
View file @baf6f64
+++ b/Model/LSTM_model.ipynb 0 → 100644
View file @baf6f64
+ {
+   "nbformat": 4,
+   "nbformat_minor": 0,
+   "metadata": {
+     "colab": {
+       "name": "LSTM_7.ipynb",
+       "provenance": [],
+       "collapsed_sections": []
+     },
+     "kernelspec": {
+       "display_name": "Python 3",
+       "name": "python3"
+     },
+     "accelerator": "GPU"
+   },
+   "cells": [
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "aRHde3RC83kB"
+       },
+       "source": [
+         "import pandas as pd\n",
+         "import numpy as np\n",
+         "from sklearn.model_selection import train_test_split\n",
+         "from sklearn import metrics\n",
+         "from keras.wrappers.scikit_learn import KerasClassifier\n",
+         "from tensorflow.keras.models import Sequential\n",
+         "from tensorflow.keras.layers import Dense, Activation\n",
+         "from tensorflow.keras.callbacks import EarlyStopping\n",
+         "import tensorflow.feature_column as feature_column\n",
+         "from keras.utils.vis_utils import plot_model\n",
+         "from tensorflow.keras import layers\n",
+         "from sklearn.externals.six import StringIO  \n",
+         "from sklearn.metrics import confusion_matrix\n",
+         "from sklearn.preprocessing import OneHotEncoder, LabelEncoder\n",
+         "from sklearn.metrics import roc_auc_score, roc_curve\n",
+         "from sklearn.metrics import plot_confusion_matrix\n",
+         "from warnings import simplefilter\n",
+         "import time\n",
+         "from ast import literal_eval\n",
+         "\n",
+         "# Sequence of dataset generating\n",
+         "# malaria -> flood -> normal -> bruteforce -> malformed\n",
+         "f_class0 = '/content/drive/MyDrive/Datasets/MQTTset/malaria.csv' # malariaDoS\n",
+         "f_class1 = '/content/drive/MyDrive/Datasets/MQTTset/flood.csv' # flood\n",
+         "f_class2 = '/content/drive/MyDrive/Datasets/MQTTset/legitimate1.csv' # normal\n",
+         "f_class3 = '/content/drive/MyDrive/Datasets/MQTTset/bruteforce.csv' # bruteforce\n",
+         "f_class4 = '/content/drive/MyDrive/Datasets/MQTTset/malformed.csv' # malformed\n",
+         "\n",
+         "# 클래스 분류번호\n",
+         "# 0: malariaDoS\n",
+         "# 1: flood\n",
+         "# 2: normal\n",
+         "# 3: bruteforce\n",
+         "# 4: malformed\n",
+         "\n",
+         "fileList = [f_class0,f_class1,f_class2,f_class3,f_class4]\n",
+         "targetList = [0,1,2,3,4] \n",
+         "\n",
+         "pd.set_option('display.max_columns', 33)                       # 출력할 열의 최대개수\n",
+         "\n",
+         "# feature 리스트\n",
+         "allfeatures = ['frame.time_delta', 'frame.time_delta_displayed', 'frame.time_epoch', 'frame.time_invalid', 'frame.time_relative', 'eth.src', 'eth.dst', 'ip.src', 'ip.dst', 'tcp.srcport', 'tcp.dstport', 'tcp.flags', 'frame.cap_len', 'frame.len', 'frame.number', 'tcp.stream', 'tcp.analysis.initial_rtt', 'tcp.time_delta', 'tcp.len', 'tcp.window_size_value', 'tcp.checksum', 'mqtt.clientid', 'mqtt.clientid_len', 'mqtt.conack.flags', 'mqtt.conack.flags.reserved', 'mqtt.conack.flags.sp', 'mqtt.conack.val', 'mqtt.conflag.cleansess', 'mqtt.conflag.passwd', 'mqtt.conflag.qos', 'mqtt.conflag.reserved', 'mqtt.conflag.retain', 'mqtt.conflag.uname', 'mqtt.conflag.willflag', 'mqtt.conflags', 'mqtt.dupflag', 'mqtt.hdrflags', 'mqtt.kalive', 'mqtt.len', 'mqtt.msg', 'mqtt.msgid', 'mqtt.msgtype', 'mqtt.passwd', 'mqtt.passwd_len', 'mqtt.proto_len', 'mqtt.protoname', 'mqtt.qos', 'mqtt.retain', 'mqtt.sub.qos', 'mqtt.suback.qos', 'mqtt.topic', 'mqtt.topic_len', 'mqtt.username', 'mqtt.username_len', 'mqtt.ver', 'mqtt.willmsg', 'mqtt.willmsg_len', 'mqtt.willtopic', 'mqtt.willtopic_len', 'ip.proto']\n",
+         "usedfeatures = ['tcp.flags', 'tcp.time_delta', 'tcp.len', 'mqtt.conack.flags', 'mqtt.conack.flags.reserved', 'mqtt.conack.flags.sp', 'mqtt.conack.val', 'mqtt.conflag.cleansess', 'mqtt.conflag.passwd', 'mqtt.conflag.qos', 'mqtt.conflag.reserved', 'mqtt.conflag.retain', 'mqtt.conflag.uname', 'mqtt.conflag.willflag', 'mqtt.conflags', 'mqtt.dupflag', 'mqtt.hdrflags', 'mqtt.kalive', 'mqtt.len', 'mqtt.msgid', 'mqtt.msgtype', 'mqtt.proto_len', 'mqtt.protoname', 'mqtt.qos', 'mqtt.retain', 'mqtt.sub.qos', 'mqtt.suback.qos', 'mqtt.ver', 'mqtt.willmsg', 'mqtt.willmsg_len', 'mqtt.willtopic', 'mqtt.willtopic_len']\n",
+         "droppedfeatures = ['ip.src', 'ip.dst', 'frame.time_relative', 'frame.time_invalid', 'mqtt.username_len', 'frame.len', 'tcp.dstport', 'tcp.window_size_value', 'mqtt.username', 'mqtt.passwd_len', 'mqtt.topic', 'mqtt.topic_len', 'tcp.checksum', 'frame.cap_len', 'mqtt.passwd', 'frame.time_delta', 'eth.dst', 'mqtt.clientid', 'frame.time_epoch', 'frame.number', 'eth.src', 'mqtt.clientid_len', 'mqtt.msg', 'tcp.stream', 'frame.time_delta_displayed', 'tcp.analysis.initial_rtt', 'tcp.srcport', 'ip.proto']\n",
+         "\n",
+         "print(\"전체 feature 개수: \", len(allfeatures))\n",
+         "print(\"사용된 feature 개수: \", len(usedfeatures))\n",
+         "print(\"제외한 feature 개수: \", len(droppedfeatures))\n",
+         "\n",
+         "\n",
+         "def split_dataset(dataframe, test_size): # 훈련/테스트/검증 데이터셋으로 분할\n",
+         "    train, test = train_test_split(dataframe, test_size=0.3, shuffle=False)\n",
+         "    train, val = train_test_split(train, test_size=0.3, shuffle=False)\n",
+         "    print(len(train), '훈련 샘플')\n",
+         "    print(len(val), '검증 샘플')\n",
+         "    print(len(test), '테스트 샘플')\n",
+         "    return train, test, val\n",
+         "\n",
+         "\n",
+         "def modify_features(fileList, dropfeaturelist): # 데이터프레임에서 제외된 feature 열들을 drop, target label 추가\n",
+         "    _dfList = []\n",
+         "    for i in range(len(fileList)):\n",
+         "        df = pd.read_csv(fileList[i], encoding='euc-kr')\n",
+         "        for feature in dropfeaturelist:\n",
+         "            df = df.drop([feature],axis=1)\n",
+         "        df['target'] = i # labeling\n",
+         "        _dfList.append(df)\n",
+         "    return _dfList\n",
+         "\n",
+         "\n",
+         "dfList = modify_features(fileList, droppedfeatures)\n",
+         "xTrainList,yTrainList = [],[] # train 데이터\n",
+         "xTestList,yTestList = [],[] # test 데이터\n",
+         "xValList,yValList = [],[] # validation 데이터\n",
+         "\n",
+         "\n",
+         "for df in dfList:\n",
+         "    df['mqtt.protoname'].fillna('No', inplace=True)\n",
+         "    df['mqtt.protoname'].replace('MQTT', 1, inplace=True)\n",
+         "    df['mqtt.protoname'].replace('No', 0, inplace=True)\n",
+         "    train, test, val = split_dataset(df, 0.3)\n",
+         "    trainLabel = train.pop('target')\n",
+         "    testLabel = test.pop('target')\n",
+         "    valLabel = val.pop('target')\n",
+         "    xTrainList.append(train)\n",
+         "    xTestList.append(test)\n",
+         "    xValList.append(val)\n",
+         "\n",
+         "    yTrainList.append(trainLabel)\n",
+         "    yTestList.append(testLabel)\n",
+         "    yValList.append(valLabel)\n",
+         "    print(train.head(2))\n",
+         "\n",
+         "# trainList,testList,valList 각각 해당 리스트 원소끼리 merge\n",
+         "\n",
+         "\n",
+         "for i in range(len(dfList)): # 길이 5의 df 리스트\n",
+         "    xTrain_df = pd.concat(xTrainList, ignore_index=True)\n",
+         "    yTrain_df = pd.concat(yTrainList, ignore_index=True)\n",
+         "\n",
+         "    xTest_df = pd.concat(xTestList, ignore_index=True)\n",
+         "    yTest_df = pd.concat(yTestList, ignore_index=True)\n",
+         "\n",
+         "    xVal_df = pd.concat(xValList, ignore_index=True)\n",
+         "    yVal_df = pd.concat(yValList, ignore_index=True)\n",
+         "\n",
+         "\n",
+         "print(\"훈련데이터셋 형상\\n\", xTrain_df.shape)\n",
+         "print(yTrain_df.shape)\n",
+         "print(\"테스트데이터셋 형상\\n\", xTest_df.shape)\n",
+         "print(yTest_df.shape)\n",
+         "print(\"검증데이터셋 형상\\n\", xVal_df.shape)\n",
+         "print(yVal_df.shape)\n",
+         "\n",
+         "\n"
+       ],
+       "execution_count": null,
+       "outputs": []
+     },
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "gnzbus9Nsr-F",
+         "colab": {
+           "base_uri": "https://localhost:8080/"
+         },
+         "outputId": "d9343f9c-fe78-4109-bf13-8c91165332c2"
+       },
+       "source": [
+         "\n",
+         "# |           16진수          |           문자열            |\n",
+         "\n",
+         "  # tcp.flags object               # mqtt.protoname object\n",
+         "\n",
+         "\n",
+         "  # mqtt.conack.flags            \n",
+         "  \n",
+         "  # mqtt.conflags\n",
+         "\n",
+         "  # mqtt.hdrflags\n",
+         "\n",
+         "hexfeatures = ['tcp.flags','mqtt.conack.flags','mqtt.conflags','mqtt.hdrflags']\n",
+         "\n",
+         "# 문자열 feature인 column들을 embedding\n",
+         "\n",
+         "\n",
+         "\n",
+         "trainList = [xTrain_df, xTest_df, xVal_df]\n",
+         "labelList = [yTrain_df, yTest_df, yVal_df]\n",
+         "\n",
+         "\n",
+         "for hexa in hexfeatures:\n",
+         "    xTrain_df[hexa].fillna('0000', inplace=True)\n",
+         "    for i in range(xTrain_df.shape[0]):\n",
+         "        if (type(xTrain_df[hexa][i]) == str) and ('x' in xTrain_df[hexa][i]):\n",
+         "            xTrain_df[hexa][i] = int(xTrain_df[hexa][i],16)\n",
+         "            #print(type(xTrain_df[hexa][i]))\n",
+         "        else:\n",
+         "            xTrain_df[hexa][i] = int(xTrain_df[hexa][i])\n",
+         "\n"
+       ],
+       "execution_count": null,
+       "outputs": [
+         {
+           "output_type": "stream",
+           "text": [
+             "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:26: SettingWithCopyWarning: \n",
+             "A value is trying to be set on a copy of a slice from a DataFrame\n",
+             "\n",
+             "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+             "/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:29: SettingWithCopyWarning: \n",
+             "A value is trying to be set on a copy of a slice from a DataFrame\n",
+             "\n",
+             "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"
+           ],
+           "name": "stderr"
+         }
+       ]
+     },
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "7Fg_qxJwA0m1"
+       },
+       "source": [
+         "\r\n",
+         "for hexa in hexfeatures:\r\n",
+         "    xTest_df[hexa].fillna('0000', inplace=True)\r\n",
+         "    for i in range(xTest_df.shape[0]):\r\n",
+         "        if (type(xTest_df[hexa][i]) == str) and ('x' in xTest_df[hexa][i]):\r\n",
+         "            xTest_df[hexa][i] = int(xTest_df[hexa][i],16)\r\n",
+         "            #print(type(xTest_df[hexa][i]))\r\n",
+         "        else:\r\n",
+         "            xTest_df[hexa][i] = int(xTest_df[hexa][i])\r\n",
+         "\r\n",
+         "for hexa in hexfeatures:\r\n",
+         "    xVal_df[hexa].fillna('0000', inplace=True)\r\n",
+         "    for i in range(xVal_df.shape[0]):\r\n",
+         "        if (type(xVal_df[hexa][i]) == str) and ('x' in xVal_df[hexa][i]):\r\n",
+         "            xVal_df[hexa][i] = int(xVal_df[hexa][i],16)\r\n",
+         "            #print(type(xVal_df[hexa][i]))\r\n",
+         "        else:\r\n",
+         "            xVal_df[hexa][i] = int(xVal_df[hexa][i])\r\n",
+         "\r\n",
+         "print(xTrain_df.shape)\r\n",
+         "print(xTest_df.shape)\r\n",
+         "print(xVal_df.shape)"
+       ],
+       "execution_count": null,
+       "outputs": []
+     },
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "imSEh-WcTvz-"
+       },
+       "source": [
+         "xTrain_df.fillna(0, inplace=True)\r\n",
+         "xTest_df.fillna(0, inplace=True)\r\n",
+         "xVal_df.fillna(0, inplace=True)\r\n",
+         "print(xTrain_df.head(40))"
+       ],
+       "execution_count": null,
+       "outputs": []
+     },
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "lAjG_YjoSSQU"
+       },
+       "source": [
+         "# 임시 파일저장\r\n",
+         "xTrain_df.to_csv(\"/content/drive/MyDrive/Datasets/MQTTset/xTrain.csv\", mode='w', index=False, header=True, encoding='utf-8')\r\n",
+         "xTest_df.to_csv(\"/content/drive/MyDrive/Datasets/MQTTset/xTest.csv\", mode='w', index=False, header=True, encoding='utf-8')\r\n",
+         "xVal_df.to_csv(\"/content/drive/MyDrive/Datasets/MQTTset/xVal.csv\", mode='w', index=False, header=True, encoding='utf-8')"
+       ],
+       "execution_count": null,
+       "outputs": []
+     },
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "SpMsliPqVUjK"
+       },
+       "source": [
+         "xTrain_df = pd.read_csv(\"/content/drive/MyDrive/Datasets/MQTTset/xTrain.csv\")\r\n",
+         "xTest_df = pd.read_csv(\"/content/drive/MyDrive/Datasets/MQTTset/xTest.csv\")\r\n",
+         "xVal_df = pd.read_csv(\"/content/drive/MyDrive/Datasets/MQTTset/xVal.csv\")\r\n",
+         "print(xTrain_df.shape)\r\n",
+         "print(xTest_df.shape)\r\n",
+         "print(xVal_df.shape)"
+       ],
+       "execution_count": null,
+       "outputs": []
+     },
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "OqmBTzoYB9zP"
+       },
+       "source": [
+         "import pickle\r\n",
+         "import os\r\n",
+         "import sys"
+       ],
+       "execution_count": 4,
+       "outputs": []
+     },
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "NgTPWw4vaJFU"
+       },
+       "source": [
+         "from sklearn.preprocessing import StandardScaler\r\n",
+         "# 정규화, sequence화\r\n",
+         "scaler = StandardScaler()\r\n",
+         "xTrain_ = scaler.fit_transform(xTrain_df)\r\n",
+         "xTest_ = scaler.fit_transform(xTest_df)\r\n",
+         "xVal_ = scaler.fit_transform(xVal_df)\r\n",
+         "print(xTrain_.shape)\r\n",
+         "print(xTest_.shape)\r\n",
+         "print(xVal_.shape)\r\n",
+         "\r\n",
+         "def ds_to_windows(dataset, timestep) : # dataset을 원하는 window 길이(timestep)로 나누어 저장 \r\n",
+         "    #winset = np.array([])\r\n",
+         "    winset = []\r\n",
+         "    if timestep == 1 : # 사실상 window로 나누는 것이 아니라 3차원으로만 만들어줌\r\n",
+         "        for i in range((len(dataset))) :\r\n",
+         "            win = list(dataset[i]) # 길이가 32인 1개 행(1, 32) [[feature1,feature2,...,feature32]]\r\n",
+         "            winset.append(win)\r\n",
+         "    else:\r\n",
+         "        for i in range(dataset.shape[0]-timestep+1):\r\n",
+         "            win = list(dataset[i:(i+timestep)]) # win 자체가 이미 2차원 (timestep*feature)\r\n",
+         "            winset.append(win)\r\n",
+         "            if i % 50 == 0:\r\n",
+         "                print(i, \"/\", dataset.shape[0]-timestep+1)\r\n",
+         "    np_winset = np.asarray(winset)\r\n",
+         "    return np_winset\r\n",
+         "\r\n",
+         "# tmp = xTrain_[:10]\r\n",
+         "\r\n",
+         "# result = ds_to_windows(tmp,3)\r\n",
+         "# resultnp = np.asarray(result)\r\n",
+         "# print(resultnp.shape)\r\n",
+         "\r\n",
+         "# numpy array 형태\r\n",
+         "# print(xTrainArr.shape)\r\n",
+         "# print(xTestArr.shape)\r\n",
+         "# print(xValArr.shape)\r\n",
+         "\r\n",
+         "xTrain = ds_to_windows(xTrain_,10)\r\n",
+         "print(xTrain.shape)\r\n",
+         "with open(\"xTrain.pickle\",\"wb\") as fw:\r\n",
+         "    pickle.dump(xTrain, fw)\r\n",
+         "xTest = ds_to_windows(xTest_,10)\r\n",
+         "print(xTest.shape)\r\n",
+         "with open(\"xTest.pickle\",\"wb\") as fw:\r\n",
+         "    pickle.dump(xTest, fw)\r\n",
+         "xVal = ds_to_windows(xVal_,10)\r\n",
+         "print(xVal.shape)\r\n",
+         "with open(\"xVal.pickle\",\"wb\") as fw:\r\n",
+         "    pickle.dump(xVal, fw)\r\n",
+         "\r\n",
+         "\r\n"
+       ],
+       "execution_count": null,
+       "outputs": []
+     },
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "Xb16z5AGwjwZ"
+       },
+       "source": [
+         "# Label도 seq화\r\n",
+         "yTrain_ = np.asarray(yTrain_df)\r\n",
+         "yTest_ = np.asarray(yTest_df)\r\n",
+         "yVal_ = np.asarray(yVal_df)\r\n",
+         "\r\n",
+         "yTrainTmp = ds_to_windows(yTrain_,10)\r\n",
+         "yTrain = []\r\n",
+         "\r\n",
+         "for i in range(yTrainTmp.shape[0]):\r\n",
+         "    ohv = np.array([0,0,0,0,0])\r\n",
+         "    ohv[(np.bincount(yTrainTmp[i]).argmax())] += 1\r\n",
+         "    yTrain.append(ohv)\r\n",
+         "yTrain = np.asarray(yTrain)\r\n",
+         "\r\n",
+         "with open(\"yTrain.pickle\",\"wb\") as fw:\r\n",
+         "    pickle.dump(yTrain, fw)\r\n",
+         "\r\n",
+         "yTestTmp = ds_to_windows(yTest_,10)\r\n",
+         "yTest = []\r\n",
+         "for i in range(yTestTmp.shape[0]):\r\n",
+         "    ohv = np.array([0,0,0,0,0])\r\n",
+         "    ohv[(np.bincount(yTestTmp[i]).argmax())] += 1\r\n",
+         "    yTest.append(ohv)\r\n",
+         "yTest = np.asarray(yTest)\r\n",
+         "\r\n",
+         "with open(\"yTest.pickle\",\"wb\") as fw:\r\n",
+         "    pickle.dump(yTestTmp, fw)\r\n",
+         "\r\n",
+         "yValTmp = ds_to_windows(yVal_,10)\r\n",
+         "yVal = []\r\n",
+         "for i in range(yValTmp.shape[0]):\r\n",
+         "    ohv = np.array([0,0,0,0,0])\r\n",
+         "    ohv[(np.bincount(yValTmp[i]).argmax())] += 1\r\n",
+         "    yVal.append(ohv)\r\n",
+         "yVal = np.asarray(yVal)\r\n",
+         "\r\n",
+         "with open(\"yVal.pickle\",\"wb\") as fw:\r\n",
+         "    pickle.dump(yVal, fw)\r\n",
+         "\r\n",
+         "print(yTrain.shape)\r\n",
+         "print(yTest.shape)\r\n",
+         "print(yVal.shape)"
+       ],
+       "execution_count": null,
+       "outputs": []
+     },
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "yGpaTlxbX-fa"
+       },
+       "source": [
+         "print(\"xTrain: \", xTrain.shape)\r\n",
+         "print(\"xTest: \", xTest.shape)\r\n",
+         "print(\"xVal: \", xVal.shape)\r\n",
+         "print(\"yTrain: \", yTrain.shape)\r\n",
+         "print(\"yTest: \", yTest.shape)\r\n",
+         "print(\"yVal: \", yVal.shape)"
+       ],
+       "execution_count": null,
+       "outputs": []
+     },
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "5FcsBC6iGDrt"
+       },
+       "source": [
+         "# Model\r\n",
+         "from tensorflow.keras import Sequential\r\n",
+         "from keras.layers import LSTM, Dense, Activation, Input\r\n",
+         "from keras.models import Model\r\n",
+         "from keras.optimizers import Adam\r\n",
+         "from keras.callbacks import ModelCheckpoint, EarlyStopping\r\n",
+         "\r\n",
+         "\r\n",
+         "model = Sequential()\r\n",
+         "model.add(LSTM(128, dropout=0.2, return_sequences=False, input_shape=(10, 32)))\r\n",
+         "model.add(Dense(128, activation='relu'))\r\n",
+         "model.add(Dense(5, activation='softmax'))\r\n",
+         "model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\r\n",
+         "model.summary()\r\n",
+         "\r\n",
+         "\r\n",
+         "# input = Input(shape=(10, 32))\r\n",
+         "# x = LSTM(128, return_sequences=True)(input)\r\n",
+         "# x = LSTM(128, return_sequences=True)(x)\r\n",
+         "# x = LSTM(128)(x)\r\n",
+         "# x = Dense(5, activation='softmax')(x)\r\n",
+         "# model = Model(input, x)\r\n",
+         "# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])\r\n",
+         "# model.summary()\r\n",
+         "\r\n",
+         "filename = 'checkpoint-epoch-trial-001.h5'\r\n",
+         "checkpoint = ModelCheckpoint(filename,             # file명을 지정합니다\r\n",
+         "                             monitor='val_loss',   # val_loss 값이 개선되었을때 호출됩니다\r\n",
+         "                             verbose=0,            # 로그를 출력합니다\r\n",
+         "                             save_best_only=True,  # 가장 best 값만 저장합니다\r\n",
+         "                             mode='auto'           # auto는 알아서 best를 찾습니다. min/max\r\n",
+         "                            )\r\n",
+         "\r\n",
+         "# earlystopping = EarlyStopping(monitor='val_loss',  # 모니터 기준 설정 (val loss) \r\n",
+         "#                               patience=15,         # 10회 Epoch동안 개선되지 않는다면 종료\r\n",
+         "#                              )\r\n",
+         "\r\n",
+         "hist = model.fit(xTrain, yTrain, \r\n",
+         "                 validation_data=(xVal, yVal),\r\n",
+         "                 epochs=50,\r\n",
+         "                 callbacks=[checkpoint, earlystopping], # checkpoint, earlystopping 콜백\r\n",
+         "                )\r\n",
+         "\r\n",
+         "\r\n",
+         "\r\n",
+         "# ## Load pickle\r\n",
+         "# with open(\"data.pickle\",\"rb\") as fr:\r\n",
+         "#     data = pickle.load(fr)\r\n",
+         "# print(data)\r\n",
+         "# #['a', 'b', 'c']"
+       ],
+       "execution_count": null,
+       "outputs": []
+     },
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "YPVG1OUZ5voT"
+       },
+       "source": [
+         "\r\n",
+         "# 학습과정 시각화\r\n",
+         "import matplotlib.pyplot as plt\r\n",
+         "\r\n",
+         "fig, loss_ax = plt.subplots()\r\n",
+         "\r\n",
+         "acc_ax = loss_ax.twinx()\r\n",
+         "\r\n",
+         "loss_ax.set_ylim([0.0, 0.03])\r\n",
+         "acc_ax.set_ylim([0.99, 1.0])\r\n",
+         "\r\n",
+         "loss_ax.plot(hist.history['loss'], 'y', label='train_loss')\r\n",
+         "acc_ax.plot(hist.history['accuracy'], 'b', label='train_accracy')\r\n",
+         "\r\n",
+         "loss_ax.set_xlabel('epoch')\r\n",
+         "loss_ax.set_ylabel('loss')\r\n",
+         "acc_ax.set_ylabel('accuray')\r\n",
+         "\r\n",
+         "loss_ax.legend(loc='upper left')\r\n",
+         "acc_ax.legend(loc='lower left')\r\n",
+         "\r\n",
+         "plt.show()"
+       ],
+       "execution_count": null,
+       "outputs": []
+     },
+     {
+       "cell_type": "code",
+       "metadata": {
+         "id": "U78Q9bvBXlFi"
+       },
+       "source": [
+         "# # 성능평가\r\n",
+         "loss_and_metrics = model.evaluate(xTest, yTest, batch_size=50)\r\n",
+         "\r\n",
+         "print('loss_and_metrics : ' + str(loss_and_metrics))\r\n"
+       ],
+       "execution_count": null,
+       "outputs": []
+     }
+   ]
+ }
\ No newline at end of file
--- a/Model/lstm_model.h5 0 → 100644
View file @baf6f64
+++ b/Model/lstm_model.h5 0 → 100644
View file @baf6f64