전체 인덱싱+뮤지컬.ipynb 171 KB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5699\n",
      "9823\n",
      "14020\n",
      "2727\n",
      "1498\n",
      "1464\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from nltk.corpus import stopwords\n",
    "from nltk.tokenize import word_tokenize\n",
    "import re\n",
    "\n",
    "\"\"\"\n",
    "thriller_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/moviedata/moviePlot/thrillerPlot.csv')\n",
    "drama_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/moviedata/moviePlot/dramaPlot.csv')\n",
    "fantasy_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/moviedata/moviePlot/fantasyPlot.csv')\n",
    "history_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/moviedata/moviePlot/historyPlot.csv')\n",
    "social_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/moviedata/moviePlot/socialPlot.csv')\n",
    "romance_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/moviedata/moviePlot/romancePlot.csv')\n",
    "musical_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/musicalData/broadMusicalPlot.csv',encoding='cp949')\n",
    "\n",
    "# /Users/김서영/Desktop/datacap/data/moviedata/moviePlot/romancePlot.csv\n",
    "\"\"\"\n",
    "romance_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/moviedata/moviePlot/romancePlot.csv')\n",
    "thriller_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/moviedata/moviePlot/thrillerPlot.csv')\n",
    "drama_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/moviedata/moviePlot/dramaPlot.csv')\n",
    "fantasy_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/moviedata/moviePlot/fantasyPlot.csv')\n",
    "history_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/moviedata/moviePlot/historyPlot.csv')\n",
    "social_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/moviedata/moviePlot/socialPlot.csv')\n",
    "\n",
    "musical_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/musicalData/broadMusicalPlot.csv',encoding='cp949')\n",
    "\n",
    "\n",
    "print(len(romance_plot)) #5699 ==> train 2500 test 2500\n",
    "print(len(thriller_plot)) #9823 ==> train 2500 test 2500\n",
    "print(len(drama_plot))\n",
    "print(len(fantasy_plot))\n",
    "print(len(history_plot))\n",
    "print(len(social_plot))\n",
    "\n",
    "train_data_size = 1463\n",
    "test_data_size = 1463\n",
    "\n",
    "#전처리(1) 전부 소문자로 변환\n",
    "\n",
    "\n",
    "#romance_plot.줄거리 = romance_plot.줄거리.str.lower()\n",
    "#thriller_plot.줄거리 = thriller_plot.줄거리.str.lower()\n",
    "\n",
    "#전처리(1-1) 데이터 csv 파일로 옮기기\n",
    "#romance_plot 2899개 train_data로 to_csv || 2800개 test_data로 to_csv\n",
    "#thriller_plot 2899개 train_data로 to_csv || 2800개 test_data로 to_csv\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "RM = [[] for _ in range(5699)]\n",
    "for i in range(5699):\n",
    "    RM[i].append(''.join(romance_plot.줄거리[i]))\n",
    "    \n",
    "TH = [[] for _ in range(9823)]\n",
    "for i in range(9823):\n",
    "    TH[i].append(''.join(thriller_plot.줄거리[i]))\n",
    "\n",
    "FN = [[] for _ in range(2727)]\n",
    "for i in range(2727):\n",
    "    FN[i].append(''.join(fantasy_plot.줄거리[i]))\n",
    "    \n",
    "HS = [[] for _ in range(1498)]\n",
    "for i in range(1498):\n",
    "    HS[i].append(''.join(history_plot.줄거리[i]))\n",
    "    \n",
    "SC = [[] for _ in range(1464)]\n",
    "for i in range(1464):\n",
    "    SC[i].append(''.join(social_plot.줄거리[i]))\n",
    "\n",
    "DR = [[] for _ in range(14019)]\n",
    "for i in range(14019):\n",
    "    DR[i].append(''.join(drama_plot.줄거리[i]))\n",
    "    \n",
    "Mu = [[] for _ in range(307)]\n",
    "for i in range(307):\n",
    "    Mu[i].append(''.join(musical_plot.muplot[i]))\n",
    "   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "allplot = RM+TH+FN+HS+SC+DR #모든 드라마 줄거리"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "35230\n",
      "307\n"
     ]
    }
   ],
   "source": [
    "print(len(allplot))\n",
    "print(len(musical_plot))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 모든 장르 줄거리 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████| 35230/35230 [02:16<00:00, 257.99it/s]\n"
     ]
    }
   ],
   "source": [
    "# 토큰화+전처리(3) 전체 불용어 처리\n",
    "# 전체 플롯\n",
    "from tqdm import tqdm\n",
    "all_vocab = {} \n",
    "all_sentences = []\n",
    "stop_words = set(stopwords.words('english'))\n",
    "\n",
    "for i in tqdm(allplot):\n",
    "    all_sentences = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
    "    result = []\n",
    "    for word in all_sentences: \n",
    "        word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
    "        if word not in stop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
    "            if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
    "                result.append(word)\n",
    "                if word not in all_vocab:\n",
    "                    all_vocab[word] = 0 \n",
    "                all_vocab[word] += 1\n",
    "    all_sentences.append(result) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "all_vocab_sorted = sorted(all_vocab.items(), key = lambda x:x[1], reverse = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "#전처리(4) 인덱스 부여\n",
    "all_word_to_index = {}\n",
    "i=0\n",
    "for (word, frequency) in all_vocab_sorted :\n",
    "    if frequency > 1 : # 정제(Cleaning) 챕터에서 언급했듯이 빈도수가 적은 단어는 제외한다.\n",
    "        i=i+1\n",
    "        all_word_to_index[word] = i\n",
    "#print(all_word_to_index)\n",
    "\n",
    "vocab_size = 15000 #상위 15000개 단어만 사용\n",
    "words_frequency = [w for w,c in all_word_to_index.items() if c >= vocab_size + 1] # 인덱스가 15000 초과인 단어 제거\n",
    "for w in words_frequency:\n",
    "    del all_word_to_index[w] # 해당 단어에 대한 인덱스 정보를 삭제\n",
    "\n",
    "    \n",
    "all_word_to_index['OOV'] = len(all_word_to_index) + 1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 로맨스"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████| 5699/5699 [00:19<00:00, 288.55it/s]\n"
     ]
    }
   ],
   "source": [
    "# 토큰화+전처리(3) 불용어 처리\n",
    "# 로맨스 플롯\n",
    "\n",
    "vocab_r = {} \n",
    "RMsentences = []\n",
    "RMstop_words = set(stopwords.words('english'))\n",
    "\n",
    "for i in tqdm(RM):\n",
    "    RMsentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
    "    result = []\n",
    "\n",
    "    for word in RMsentence: \n",
    "        word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
    "        if word not in RMstop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
    "            if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
    "                result.append(word)\n",
    "                if word not in vocab_r:\n",
    "                    vocab_r[word] = 0 \n",
    "                vocab_r[word] += 1\n",
    "    RMsentences.append(result) \n",
    "\n",
    "R_encoded = []\n",
    "for s in RMsentences:\n",
    "    temp = []\n",
    "    for w in s:\n",
    "        try:\n",
    "            temp.append(all_word_to_index[w])\n",
    "        except KeyError:\n",
    "            temp.append(all_word_to_index['OOV'])\n",
    "    R_encoded.append(temp)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 스릴러"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████| 9823/9823 [00:44<00:00, 221.65it/s]\n"
     ]
    }
   ],
   "source": [
    "# 토큰화+전처리(3) 불용어 처리\n",
    "# 스릴러 플롯\n",
    "\n",
    "vocab_th = {} \n",
    "THsentences = []\n",
    "THstop_words = set(stopwords.words('english'))\n",
    "\n",
    "for i in tqdm(TH):\n",
    " \n",
    "    THsentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
    "    result = []\n",
    "\n",
    "    for word in THsentence: \n",
    "        word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
    "        if word not in THstop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
    "            if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
    "                result.append(word)\n",
    "                if word not in vocab_th:\n",
    "                    vocab_th[word] = 0 \n",
    "                vocab_th[word] += 1\n",
    "    THsentences.append(result) \n",
    "\n",
    "TH_encoded = []\n",
    "for s in THsentences:\n",
    "    temp = []\n",
    "    for w in s:\n",
    "        try:\n",
    "            temp.append(all_word_to_index[w])\n",
    "        except KeyError:\n",
    "            temp.append(all_word_to_index['OOV'])\n",
    "    TH_encoded.append(temp)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 역사"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████| 1498/1498 [00:05<00:00, 281.17it/s]\n"
     ]
    }
   ],
   "source": [
    "vocab_HS = {} \n",
    "HSsentences = []\n",
    "HSstop_words = set(stopwords.words('english'))\n",
    "\n",
    "for i in tqdm(HS):\n",
    "    \n",
    "    HSsentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
    "    result = []\n",
    "\n",
    "    for word in HSsentence: \n",
    "        word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
    "        if word not in HSstop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
    "            if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
    "                result.append(word)\n",
    "                if word not in vocab_HS:\n",
    "                    vocab_HS[word] = 0 \n",
    "                vocab_HS[word] += 1\n",
    "    HSsentences.append(result) \n",
    "\n",
    "HS_encoded = []\n",
    "for s in HSsentences:\n",
    "    temp = []\n",
    "    for w in s:\n",
    "        try:\n",
    "            temp.append(all_word_to_index[w])\n",
    "        except KeyError:\n",
    "            temp.append(all_word_to_index['OOV'])\n",
    "    HS_encoded.append(temp)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 드라마"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████| 14019/14019 [00:44<00:00, 316.67it/s]\n"
     ]
    }
   ],
   "source": [
    "vocab_DR = {} \n",
    "DRsentences = []\n",
    "DRstop_words = set(stopwords.words('english'))\n",
    "\n",
    "for i in tqdm(DR):\n",
    "    \n",
    "    DRsentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
    "    result = []\n",
    "\n",
    "    for word in DRsentence: \n",
    "        word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
    "        if word not in DRstop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
    "            if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
    "                result.append(word)\n",
    "                if word not in vocab_DR:\n",
    "                    vocab_DR[word] = 0 \n",
    "                vocab_DR[word] += 1\n",
    "    DRsentences.append(result) \n",
    "\n",
    "DR_encoded = []\n",
    "for s in DRsentences:\n",
    "    temp = []\n",
    "    for w in s:\n",
    "        try:\n",
    "            temp.append(all_word_to_index[w])\n",
    "        except KeyError:\n",
    "            temp.append(all_word_to_index['OOV'])\n",
    "    DR_encoded.append(temp)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 판타지"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████| 2727/2727 [00:12<00:00, 214.44it/s]\n"
     ]
    }
   ],
   "source": [
    "vocab_FN = {} \n",
    "FNsentences = []\n",
    "FNstop_words = set(stopwords.words('english'))\n",
    "\n",
    "for i in tqdm(FN):\n",
    "    \n",
    "    FNsentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
    "    result = []\n",
    "\n",
    "    for word in FNsentence: \n",
    "        word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
    "        if word not in FNstop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
    "            if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
    "                result.append(word)\n",
    "                if word not in vocab_FN:\n",
    "                    vocab_FN[word] = 0 \n",
    "                vocab_FN[word] += 1\n",
    "    FNsentences.append(result) \n",
    "\n",
    "FN_encoded = []\n",
    "for s in FNsentences:\n",
    "    temp = []\n",
    "    for w in s:\n",
    "        try:\n",
    "            temp.append(all_word_to_index[w])\n",
    "        except KeyError:\n",
    "            temp.append(all_word_to_index['OOV'])\n",
    "    FN_encoded.append(temp)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 사회"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████| 1464/1464 [00:05<00:00, 266.80it/s]\n"
     ]
    }
   ],
   "source": [
    "vocab_SC = {} \n",
    "SCsentences = []\n",
    "SCstop_words = set(stopwords.words('english'))\n",
    "\n",
    "for i in tqdm(SC):\n",
    "    \n",
    "    SCsentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
    "    result = []\n",
    "\n",
    "    for word in SCsentence: \n",
    "        word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
    "        if word not in SCstop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
    "            if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
    "                result.append(word)\n",
    "                if word not in vocab_SC:\n",
    "                    vocab_SC[word] = 0 \n",
    "                vocab_SC[word] += 1\n",
    "    SCsentences.append(result) \n",
    "\n",
    "SC_encoded = []\n",
    "for s in SCsentences:\n",
    "    temp = []\n",
    "    for w in s:\n",
    "        try:\n",
    "            temp.append(all_word_to_index[w])\n",
    "        except KeyError:\n",
    "            temp.append(all_word_to_index['OOV'])\n",
    "    SC_encoded.append(temp)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 뮤지컬"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████| 307/307 [00:00<00:00, 1256.35it/s]\n"
     ]
    }
   ],
   "source": [
    "# 토큰화+전처리(3) 전체 불용어 처리\n",
    "# 전체 플롯\n",
    "from tqdm import tqdm\n",
    "Mu_vocab = {} \n",
    "Mu_sentences = []\n",
    "Mu_stop_words = set(stopwords.words('english'))\n",
    "\n",
    "for i in tqdm(Mu):\n",
    "    Mu_sentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
    "    result = []\n",
    "    \n",
    "    for word in Mu_sentence: \n",
    "        word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
    "        if word not in Mu_stop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
    "            if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
    "                result.append(word)\n",
    "                if word not in Mu_vocab:\n",
    "                    Mu_vocab[word] = 0 \n",
    "                Mu_vocab[word] += 1\n",
    "\n",
    "    Mu_sentences.append(result) "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "print(Mu_sentences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "Mu_encoded = []\n",
    "for s in Mu_sentences:\n",
    "    temp = []\n",
    "    for w in s:\n",
    "        try:\n",
    "            temp.append(all_word_to_index[w])\n",
    "        except KeyError:\n",
    "            temp.append(all_word_to_index['OOV'])\n",
    "    Mu_encoded.append(temp)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "print(Mu_encoded)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "#전처리 방법에는 NLTK의 FreqDist, 케라스(Keras) 토크나이저도 사용 가능."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "로맨스 플롯, 스릴러 따로 토큰화 해서 x train에 넣을지... 고민중\n",
    "\n",
    "이번주 : 전처리 완료, \n",
    "이번 달 목표 : 뮤지컬 장르 분류 << 다양한 모델 사용해보기.\n",
    "\n",
    "6월에 교차검증 및 장르 시각화 설계까지.\n",
    "\n",
    "다음주 :  2진분류(LSTM) 완료, RNN 분류기 만들어보기"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 학습데이터"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 영화 줄거리는 X_train에, 장르 정보는 y_train에 저장된다.\n",
    "# 테스트용 줄거리 X_test에, 테스트용 줄거리의 장르 정보는 y_test에 저장된다.\n",
    "#맞춰서 저장하기. (진행중)\n",
    "\n",
    "#X_train = train_sc_df.dropna().drop(‘trade_price_idx_value’, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = []\n",
    "Y_train = []  #0 : romance, 1 : thriller \n",
    "for i in range(train_data_size):\n",
    "    X_train.append(R_encoded[i])\n",
    "    Y_train.append([1,0])\n",
    "    X_train.append(TH_encoded[i])\n",
    "    Y_train.append([0,1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "줄거리 최대 길이 :  1974\n",
      "줄거리 평균 길이 :  267.093984962406\n"
     ]
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "len_result = [len(s) for s in X_train]\n",
    "print(\"줄거리 최대 길이 : \",max(len_result))\n",
    "print(\"줄거리 평균 길이 : \",sum(len_result)/len(len_result))\n",
    "\n",
    "plt.subplot(1,2,1)\n",
    "plt.boxplot(len_result)\n",
    "plt.subplot(1,2,2)\n",
    "plt.hist(len_result, bins=50)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 테스트 데이터\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_test = []\n",
    "Y_test = []  #0 : romance, 1 : thriller \n",
    "M_test = []\n",
    "for i in range(test_data_size):\n",
    "    X_test.append(R_encoded[train_data_size+i])\n",
    "    Y_test.append([1,0])\n",
    "    X_test.append(TH_encoded[train_data_size+i])\n",
    "    Y_test.append([0,1])\n",
    "M_test=Mu_encoded"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "줄거리 최대 길이 :  1749\n",
      "줄거리 평균 길이 :  197.71394395078605\n"
     ]
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "len_result = [len(s) for s in X_test]\n",
    "print(\"줄거리 최대 길이 : \",max(len_result))\n",
    "print(\"줄거리 평균 길이 : \",sum(len_result)/len(len_result))\n",
    "\n",
    "plt.subplot(1,2,1)\n",
    "plt.boxplot(len_result)\n",
    "plt.subplot(1,2,2)\n",
    "plt.hist(len_result, bins=50)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## LSTM 분류 \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import Dense, LSTM, Embedding\n",
    "from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint\n",
    "import numpy as np\n",
    "\n",
    "M_test= np.array(M_test)\n",
    "max_len = 230\n",
    "X_train = pad_sequences(X_train, maxlen=max_len)\n",
    "X_test = pad_sequences(X_test, maxlen=max_len)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 2926 samples, validate on 2926 samples\n",
      "Epoch 1/3\n",
      "2880/2926 [============================>.] - ETA: 0s - loss: 0.6364 - acc: 0.6495\n",
      "Epoch 00001: val_acc improved from -inf to 0.73445, saving model to best_model.h5\n",
      "2926/2926 [==============================] - 26s 9ms/sample - loss: 0.6351 - acc: 0.6505 - val_loss: 0.5954 - val_acc: 0.7344\n",
      "Epoch 2/3\n",
      "2880/2926 [============================>.] - ETA: 0s - loss: 0.4949 - acc: 0.8358\n",
      "Epoch 00002: val_acc improved from 0.73445 to 0.78776, saving model to best_model.h5\n",
      "2926/2926 [==============================] - 26s 9ms/sample - loss: 0.4925 - acc: 0.8360 - val_loss: 0.4433 - val_acc: 0.7878\n",
      "Epoch 3/3\n",
      " 896/2926 [========>.....................] - ETA: 13s - loss: 0.2348 - acc: 0.9189WARNING:tensorflow:Early stopping conditioned on metric `val_loss` which is not available. Available metrics are: loss,acc\n",
      "WARNING:tensorflow:Can save best model only with val_acc available, skipping.\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-25-2e866aa85c9f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     13\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'binary_crossentropy'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'adam'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'acc'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 15\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mY_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mY_test\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m64\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mes\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmc\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     16\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[0;32m    817\u001b[0m         \u001b[0mmax_queue_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    818\u001b[0m         \u001b[0mworkers\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 819\u001b[1;33m         use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[0;32m    820\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    821\u001b[0m   def evaluate(self,\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[0;32m    340\u001b[0m                 \u001b[0mmode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mModeKeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    341\u001b[0m                 \u001b[0mtraining_context\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtraining_context\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 342\u001b[1;33m                 total_epochs=epochs)\n\u001b[0m\u001b[0;32m    343\u001b[0m             \u001b[0mcbks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmake_logs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepoch_logs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraining_result\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mModeKeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    344\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2.py\u001b[0m in \u001b[0;36mrun_one_epoch\u001b[1;34m(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)\u001b[0m\n\u001b[0;32m    126\u001b[0m         step=step, mode=mode, size=current_batch_size) as batch_logs:\n\u001b[0;32m    127\u001b[0m       \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 128\u001b[1;33m         \u001b[0mbatch_outs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    129\u001b[0m       \u001b[1;32mexcept\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mStopIteration\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mOutOfRangeError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    130\u001b[0m         \u001b[1;31m# TODO(kaftan): File bug about tf function and errors.OutOfRangeError?\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2_utils.py\u001b[0m in \u001b[0;36mexecution_function\u001b[1;34m(input_fn)\u001b[0m\n\u001b[0;32m     96\u001b[0m     \u001b[1;31m# `numpy` translates Tensors to values in Eager mode.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     97\u001b[0m     return nest.map_structure(_non_none_constant_value,\n\u001b[1;32m---> 98\u001b[1;33m                               distributed_function(input_fn))\n\u001b[0m\u001b[0;32m     99\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    100\u001b[0m   \u001b[1;32mreturn\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m    566\u001b[0m         \u001b[0mxla_context\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    567\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 568\u001b[1;33m       \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    569\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    570\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m_call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m    597\u001b[0m       \u001b[1;31m# In this case we have created variables on the first call, so we run the\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    598\u001b[0m       \u001b[1;31m# defunned version which is guaranteed to never create variables.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 599\u001b[1;33m       \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m  \u001b[1;31m# pylint: disable=not-callable\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    600\u001b[0m     \u001b[1;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    601\u001b[0m       \u001b[1;31m# Release the lock early so that multiple threads can perform the call\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   2361\u001b[0m     \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2362\u001b[0m       \u001b[0mgraph_function\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2363\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m  \u001b[1;31m# pylint: disable=protected-access\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2364\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2365\u001b[0m   \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[1;34m(self, args, kwargs)\u001b[0m\n\u001b[0;32m   1609\u001b[0m          if isinstance(t, (ops.Tensor,\n\u001b[0;32m   1610\u001b[0m                            resource_variable_ops.BaseResourceVariable))),\n\u001b[1;32m-> 1611\u001b[1;33m         self.captured_inputs)\n\u001b[0m\u001b[0;32m   1612\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1613\u001b[0m   \u001b[1;32mdef\u001b[0m \u001b[0m_call_flat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcaptured_inputs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcancellation_manager\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m   1690\u001b[0m       \u001b[1;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1691\u001b[0m       return self._build_call_outputs(self._inference_function.call(\n\u001b[1;32m-> 1692\u001b[1;33m           ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[0;32m   1693\u001b[0m     forward_backward = self._select_forward_and_backward_functions(\n\u001b[0;32m   1694\u001b[0m         \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36mcall\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m    543\u001b[0m               \u001b[0minputs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    544\u001b[0m               \u001b[0mattrs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"executor_type\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexecutor_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"config_proto\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 545\u001b[1;33m               ctx=ctx)\n\u001b[0m\u001b[0;32m    546\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    547\u001b[0m           outputs = execute.execute_with_cancellation(\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m     59\u001b[0m     tensors = pywrap_tensorflow.TFE_Py_Execute(ctx._handle, device_name,\n\u001b[0;32m     60\u001b[0m                                                \u001b[0mop_name\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mattrs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 61\u001b[1;33m                                                num_outputs)\n\u001b[0m\u001b[0;32m     62\u001b[0m   \u001b[1;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     63\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "model = Sequential()\n",
    "model.add(Embedding(15002, 120))\n",
    "model.add(LSTM(128))\n",
    "model.add(Dense(2, activation='sigmoid'))\n",
    "\n",
    "es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)\n",
    "mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)\n",
    "\n",
    "X_train = np.array(X_train)\n",
    "Y_train = np.array(Y_train)\n",
    "X_test = np.array(X_test)\n",
    "Y_test = np.array(Y_test)\n",
    "\n",
    "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])\n",
    "model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=3, batch_size=64, callbacks=[es, mc])\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[9.89297211e-01, 1.10417316e-02],\n",
       "       [3.73128988e-02, 9.67208505e-01],\n",
       "       [9.98658895e-01, 1.62478851e-03],\n",
       "       [9.96271133e-01, 4.04474512e-03],\n",
       "       [8.88998568e-01, 1.09605283e-01],\n",
       "       [9.98210311e-01, 2.08193017e-03],\n",
       "       [3.54536772e-01, 6.69178307e-01],\n",
       "       [9.85074520e-01, 1.48036592e-02],\n",
       "       [1.29813954e-01, 8.76039565e-01],\n",
       "       [9.62996125e-01, 3.44542079e-02],\n",
       "       [7.55765080e-01, 2.49890730e-01],\n",
       "       [3.65469605e-01, 6.57850266e-01],\n",
       "       [9.98800039e-01, 1.51763496e-03],\n",
       "       [9.97327685e-01, 3.15122958e-03],\n",
       "       [5.75969875e-01, 4.37167883e-01],\n",
       "       [9.98559892e-01, 1.73308724e-03],\n",
       "       [9.83565748e-01, 1.57846343e-02],\n",
       "       [9.99194920e-01, 1.05460756e-03],\n",
       "       [9.97159958e-01, 3.05769313e-03],\n",
       "       [7.86324322e-01, 2.11838797e-01],\n",
       "       [9.94806945e-01, 5.56808943e-03],\n",
       "       [9.94939566e-01, 5.16507169e-03],\n",
       "       [9.95327115e-01, 5.44817653e-03],\n",
       "       [8.30366760e-02, 9.23830807e-01],\n",
       "       [9.99270737e-01, 9.99870244e-04],\n",
       "       [9.98326242e-01, 1.99769647e-03],\n",
       "       [2.01347172e-01, 8.05066824e-01],\n",
       "       [9.97681141e-01, 2.63667619e-03],\n",
       "       [6.56308457e-02, 9.39631820e-01],\n",
       "       [9.97786045e-01, 2.47393013e-03],\n",
       "       [9.95178938e-01, 4.89054574e-03],\n",
       "       [9.98978972e-01, 1.31971261e-03],\n",
       "       [9.96212244e-01, 3.93838761e-03],\n",
       "       [8.08125079e-01, 1.83967963e-01],\n",
       "       [9.86248910e-01, 1.37963891e-02],\n",
       "       [8.94822180e-01, 9.63397995e-02],\n",
       "       [4.63879943e-01, 5.36770880e-01],\n",
       "       [9.90485072e-01, 9.23611410e-03],\n",
       "       [9.98726070e-01, 1.49498892e-03],\n",
       "       [9.91048157e-01, 9.32559464e-03],\n",
       "       [3.64511549e-01, 6.21637642e-01],\n",
       "       [9.82374847e-01, 1.73149928e-02],\n",
       "       [9.93298471e-01, 6.45173620e-03],\n",
       "       [7.04595149e-01, 2.89463103e-01],\n",
       "       [9.93536115e-01, 6.79136021e-03],\n",
       "       [9.99129593e-01, 1.11387786e-03],\n",
       "       [9.06374276e-01, 8.96570683e-02],\n",
       "       [4.10660505e-01, 5.88741422e-01],\n",
       "       [9.99692917e-01, 4.83944750e-04],\n",
       "       [7.80944601e-02, 9.30601835e-01],\n",
       "       [4.43848312e-01, 5.59155643e-01],\n",
       "       [1.56105727e-01, 8.51845980e-01],\n",
       "       [9.82641697e-01, 1.64951906e-02],\n",
       "       [5.26238121e-02, 9.54043090e-01],\n",
       "       [9.80851293e-01, 1.90182012e-02],\n",
       "       [9.90199268e-01, 1.01825390e-02],\n",
       "       [8.12352419e-01, 1.85036004e-01],\n",
       "       [9.63351190e-01, 3.39603312e-02],\n",
       "       [6.31880701e-01, 3.64272743e-01],\n",
       "       [8.38387251e-01, 1.53711647e-01],\n",
       "       [9.76741195e-01, 2.20144410e-02],\n",
       "       [9.90605891e-01, 9.22481064e-03],\n",
       "       [9.98764873e-01, 1.51382165e-03],\n",
       "       [9.94506538e-01, 6.27602870e-03],\n",
       "       [9.97006834e-01, 3.38183786e-03],\n",
       "       [9.80250001e-01, 1.75612047e-02],\n",
       "       [8.35835814e-01, 1.54920161e-01],\n",
       "       [9.93885934e-01, 6.47321297e-03],\n",
       "       [7.32705653e-01, 2.38107413e-01],\n",
       "       [1.67823553e-01, 8.28654826e-01],\n",
       "       [9.97954011e-01, 2.44288007e-03],\n",
       "       [9.97217536e-01, 3.17463791e-03],\n",
       "       [9.98234391e-01, 2.21486646e-03],\n",
       "       [8.36275935e-01, 1.66395113e-01],\n",
       "       [9.97660041e-01, 2.73835333e-03],\n",
       "       [9.42627370e-01, 5.81915826e-02],\n",
       "       [4.82731223e-01, 5.22184551e-01],\n",
       "       [9.30466115e-01, 6.39836937e-02],\n",
       "       [5.61084867e-01, 4.41416740e-01],\n",
       "       [4.50993061e-01, 5.64331651e-01],\n",
       "       [1.45042473e-02, 9.87169504e-01],\n",
       "       [9.00123000e-01, 1.01546042e-01],\n",
       "       [3.05879086e-01, 7.04485118e-01],\n",
       "       [4.30749863e-01, 5.69399238e-01],\n",
       "       [1.44111022e-01, 8.66918087e-01],\n",
       "       [9.00533974e-01, 1.01281039e-01],\n",
       "       [5.68374395e-01, 4.33054060e-01],\n",
       "       [9.90041316e-01, 9.53172334e-03],\n",
       "       [9.87220049e-01, 1.34074753e-02],\n",
       "       [9.50743973e-01, 5.25186770e-02],\n",
       "       [2.19779104e-01, 7.77013421e-01],\n",
       "       [1.54326940e-02, 9.86147463e-01],\n",
       "       [9.98808622e-01, 1.44694082e-03],\n",
       "       [8.91158879e-01, 1.06756985e-01],\n",
       "       [1.05071880e-01, 8.98244858e-01],\n",
       "       [9.99034405e-01, 1.23475143e-03],\n",
       "       [9.94593918e-01, 5.76898688e-03],\n",
       "       [9.93712962e-01, 6.65295729e-03],\n",
       "       [7.02256203e-01, 2.97228634e-01],\n",
       "       [9.96766925e-01, 3.70637397e-03],\n",
       "       [9.89144385e-01, 1.11364387e-02],\n",
       "       [9.79650915e-01, 2.00015400e-02],\n",
       "       [9.93528545e-01, 6.56247372e-03],\n",
       "       [9.87190068e-01, 1.34675717e-02],\n",
       "       [9.87129986e-01, 1.17099285e-02],\n",
       "       [9.97617066e-01, 2.67686578e-03],\n",
       "       [9.97251093e-01, 3.04796081e-03],\n",
       "       [9.99035120e-01, 1.22244644e-03],\n",
       "       [9.54006970e-01, 4.89431918e-02],\n",
       "       [9.46570277e-01, 5.37833124e-02],\n",
       "       [9.43213701e-01, 5.26795760e-02],\n",
       "       [9.95134652e-01, 5.25745936e-03],\n",
       "       [8.25005472e-01, 1.69372976e-01],\n",
       "       [8.70788097e-01, 1.24156632e-01],\n",
       "       [9.97023284e-01, 3.26669309e-03],\n",
       "       [7.92557359e-01, 1.97209224e-01],\n",
       "       [9.98770893e-01, 1.56408502e-03],\n",
       "       [9.90018487e-01, 1.06498580e-02],\n",
       "       [9.90723550e-01, 8.96911696e-03],\n",
       "       [2.37598643e-01, 7.74325430e-01],\n",
       "       [6.24084949e-01, 3.58046830e-01],\n",
       "       [9.97389972e-01, 2.78766919e-03],\n",
       "       [9.84458208e-01, 1.46457935e-02],\n",
       "       [9.72309589e-01, 2.36137267e-02],\n",
       "       [9.98931468e-01, 1.41089316e-03],\n",
       "       [9.94718015e-01, 5.69924805e-03],\n",
       "       [4.72977042e-01, 5.49978912e-01],\n",
       "       [9.90615129e-01, 9.25194938e-03],\n",
       "       [3.34342234e-02, 9.68245149e-01],\n",
       "       [9.99083996e-01, 1.16300583e-03],\n",
       "       [9.44297254e-01, 5.47527708e-02],\n",
       "       [9.72299933e-01, 2.72788983e-02],\n",
       "       [9.93855774e-01, 6.32649660e-03],\n",
       "       [9.62590456e-01, 3.52819376e-02],\n",
       "       [9.79455829e-01, 1.92554407e-02],\n",
       "       [9.99165893e-01, 1.05177308e-03],\n",
       "       [9.85086679e-01, 1.40042715e-02],\n",
       "       [2.78653465e-02, 9.75346625e-01],\n",
       "       [9.98081207e-01, 2.21448927e-03],\n",
       "       [9.95232165e-01, 5.13720512e-03],\n",
       "       [8.88802350e-01, 1.11462571e-01],\n",
       "       [9.88667548e-01, 1.16618285e-02],\n",
       "       [9.98724878e-01, 1.53446209e-03],\n",
       "       [9.77607906e-01, 2.18318254e-02],\n",
       "       [9.96304035e-01, 4.18575248e-03],\n",
       "       [2.64609367e-01, 7.45012462e-01],\n",
       "       [9.72862959e-01, 2.69199926e-02],\n",
       "       [9.83956635e-01, 1.73143838e-02],\n",
       "       [9.98144031e-01, 2.16847914e-03],\n",
       "       [9.98341322e-01, 1.96177256e-03],\n",
       "       [9.79604721e-01, 1.93097256e-02],\n",
       "       [9.98944461e-01, 1.24808261e-03],\n",
       "       [8.86560798e-01, 1.12452798e-01],\n",
       "       [9.96718824e-01, 3.71032930e-03],\n",
       "       [2.21701801e-01, 7.80260146e-01],\n",
       "       [2.55418997e-02, 9.74908948e-01],\n",
       "       [9.99462545e-01, 7.41661002e-04],\n",
       "       [8.23981166e-01, 1.70397952e-01],\n",
       "       [6.52386248e-02, 9.39590633e-01],\n",
       "       [9.98122513e-01, 2.17202515e-03],\n",
       "       [7.12066889e-01, 2.86067158e-01],\n",
       "       [8.19861591e-02, 9.32014406e-01],\n",
       "       [9.58634377e-01, 3.82429920e-02],\n",
       "       [8.24238896e-01, 1.68227255e-01],\n",
       "       [9.98916030e-01, 1.41686620e-03],\n",
       "       [9.96592700e-01, 3.68587370e-03],\n",
       "       [7.33839869e-01, 2.57828295e-01],\n",
       "       [1.06705025e-01, 8.97602201e-01],\n",
       "       [3.80194485e-01, 6.08735800e-01],\n",
       "       [8.97293806e-01, 9.94179696e-02],\n",
       "       [9.10029948e-01, 8.81575868e-02],\n",
       "       [9.89466846e-01, 1.07162707e-02],\n",
       "       [1.63196921e-02, 9.86219883e-01],\n",
       "       [9.79962826e-01, 1.92297176e-02],\n",
       "       [9.99110639e-01, 1.11217669e-03],\n",
       "       [9.98811603e-01, 1.45246438e-03],\n",
       "       [9.10705030e-01, 8.23124573e-02],\n",
       "       [9.86083686e-01, 1.50616029e-02],\n",
       "       [9.94467974e-01, 5.80535224e-03],\n",
       "       [9.93595064e-01, 6.75331987e-03],\n",
       "       [9.99255478e-01, 9.77048301e-04],\n",
       "       [8.52798402e-01, 1.40539274e-01],\n",
       "       [1.64930165e-01, 8.44964683e-01],\n",
       "       [9.97384012e-01, 2.95930239e-03],\n",
       "       [9.96524751e-01, 3.76476254e-03],\n",
       "       [9.98174548e-01, 2.11151456e-03],\n",
       "       [6.01421416e-01, 3.98873955e-01],\n",
       "       [1.82097703e-01, 8.35628331e-01],\n",
       "       [9.97257769e-01, 3.08231893e-03],\n",
       "       [6.75210297e-01, 3.04572284e-01],\n",
       "       [9.53007460e-01, 4.57556657e-02],\n",
       "       [6.44749761e-01, 3.54564339e-01],\n",
       "       [9.99813974e-01, 2.97560182e-04],\n",
       "       [1.71126738e-01, 8.52856874e-01],\n",
       "       [9.67297077e-01, 3.14390585e-02],\n",
       "       [2.50648465e-02, 9.78119493e-01],\n",
       "       [7.36107171e-01, 2.60057181e-01],\n",
       "       [1.12051122e-01, 8.95975471e-01],\n",
       "       [9.98990476e-01, 1.27349573e-03],\n",
       "       [2.98825242e-02, 9.72770095e-01],\n",
       "       [9.96666729e-01, 3.56671633e-03],\n",
       "       [9.95280921e-01, 5.05312020e-03],\n",
       "       [4.67087999e-02, 9.55936015e-01],\n",
       "       [2.31040847e-02, 9.79663670e-01],\n",
       "       [9.92311239e-01, 7.36055616e-03],\n",
       "       [8.72887135e-01, 1.25496492e-01],\n",
       "       [9.83360767e-01, 1.60082821e-02],\n",
       "       [2.60338001e-02, 9.78127003e-01],\n",
       "       [9.79926884e-01, 1.79750286e-02],\n",
       "       [2.64777075e-02, 9.75348890e-01],\n",
       "       [8.47916961e-01, 1.61791548e-01],\n",
       "       [9.91084099e-01, 9.56200063e-03],\n",
       "       [9.97634172e-01, 2.64891679e-03],\n",
       "       [9.88843799e-01, 1.17526846e-02],\n",
       "       [9.99542236e-01, 6.39197184e-04],\n",
       "       [9.99526381e-01, 6.73291273e-04],\n",
       "       [9.89566207e-01, 1.05931843e-02],\n",
       "       [9.84892607e-01, 1.59311239e-02],\n",
       "       [9.47403908e-01, 5.09829447e-02],\n",
       "       [9.97905850e-01, 2.34915246e-03],\n",
       "       [3.12148798e-02, 9.70876515e-01],\n",
       "       [3.71371321e-02, 9.64185297e-01],\n",
       "       [2.86366083e-02, 9.73299086e-01],\n",
       "       [9.98304725e-01, 1.95303746e-03],\n",
       "       [5.43259323e-01, 4.45621550e-01],\n",
       "       [9.95735526e-01, 4.47552558e-03],\n",
       "       [9.99664187e-01, 4.84757824e-04],\n",
       "       [4.77395892e-01, 5.53484499e-01],\n",
       "       [2.67982967e-02, 9.75562334e-01],\n",
       "       [5.11212230e-01, 4.82527465e-01],\n",
       "       [9.96845663e-01, 3.34183313e-03],\n",
       "       [9.98655796e-01, 1.58252590e-03],\n",
       "       [9.98682797e-01, 1.57597498e-03],\n",
       "       [9.97531652e-01, 2.68543395e-03],\n",
       "       [8.89724314e-01, 1.02902323e-01],\n",
       "       [9.96919513e-01, 3.27811856e-03],\n",
       "       [9.76353884e-01, 2.36234013e-02],\n",
       "       [9.98018861e-01, 2.27649114e-03],\n",
       "       [2.69563273e-02, 9.76159155e-01],\n",
       "       [6.68949783e-02, 9.39164698e-01],\n",
       "       [9.73651588e-01, 2.52439454e-02],\n",
       "       [9.98613954e-01, 1.62094866e-03],\n",
       "       [9.44671273e-01, 5.45767955e-02],\n",
       "       [4.35869396e-02, 9.60251033e-01],\n",
       "       [1.88457757e-01, 8.20301712e-01],\n",
       "       [2.59413511e-01, 7.33782709e-01],\n",
       "       [5.41639701e-02, 9.47131872e-01],\n",
       "       [9.19235591e-03, 9.91685688e-01],\n",
       "       [9.94127393e-01, 5.99407079e-03],\n",
       "       [9.86760855e-01, 1.31814247e-02],\n",
       "       [9.78301644e-01, 2.23909188e-02],\n",
       "       [9.95761573e-01, 4.44441987e-03],\n",
       "       [9.99497414e-01, 7.02120829e-04],\n",
       "       [9.97229755e-01, 2.99413246e-03],\n",
       "       [2.33508293e-02, 9.80765402e-01],\n",
       "       [9.94520366e-01, 6.04913617e-03],\n",
       "       [9.97494936e-01, 2.87344446e-03],\n",
       "       [9.98973131e-01, 1.26466772e-03],\n",
       "       [4.97004330e-01, 4.97818023e-01],\n",
       "       [9.82652664e-01, 1.84881184e-02],\n",
       "       [1.31787378e-02, 9.88159657e-01],\n",
       "       [3.66922885e-01, 6.31668866e-01],\n",
       "       [9.96259332e-01, 4.08321712e-03],\n",
       "       [9.98756170e-01, 1.47667748e-03],\n",
       "       [7.72360861e-02, 9.27823484e-01],\n",
       "       [9.98449206e-01, 1.86312699e-03],\n",
       "       [3.15861739e-02, 9.71484125e-01],\n",
       "       [9.96845782e-01, 3.44880344e-03],\n",
       "       [7.42987245e-02, 9.30592895e-01],\n",
       "       [2.09757891e-02, 9.81689215e-01],\n",
       "       [9.98074651e-01, 2.20919168e-03],\n",
       "       [6.46533668e-01, 3.48830134e-01],\n",
       "       [5.98557949e-01, 3.84560645e-01],\n",
       "       [9.99404311e-01, 7.86028046e-04],\n",
       "       [9.98620272e-01, 1.63661852e-03],\n",
       "       [9.99131620e-01, 1.08906056e-03],\n",
       "       [3.69940363e-02, 9.68174577e-01],\n",
       "       [9.98901844e-01, 1.32231705e-03],\n",
       "       [9.85736310e-01, 1.49741964e-02],\n",
       "       [6.43389583e-01, 3.48876834e-01],\n",
       "       [4.78291005e-01, 5.33889949e-01],\n",
       "       [9.99354661e-01, 8.77332001e-04],\n",
       "       [5.98759837e-02, 9.42334712e-01],\n",
       "       [9.63255167e-01, 3.59756313e-02],\n",
       "       [2.05777939e-02, 9.81795371e-01],\n",
       "       [8.92112032e-02, 9.14921701e-01],\n",
       "       [9.96331871e-01, 4.17977013e-03],\n",
       "       [5.23199812e-02, 9.52406943e-01],\n",
       "       [9.88084376e-01, 1.19793927e-02],\n",
       "       [9.69554842e-01, 2.73279194e-02],\n",
       "       [3.67223620e-01, 6.27295792e-01],\n",
       "       [9.16537941e-01, 8.18802044e-02],\n",
       "       [9.99439061e-01, 7.88773061e-04],\n",
       "       [2.48765163e-02, 9.77342725e-01],\n",
       "       [9.96949136e-01, 3.68632539e-03],\n",
       "       [6.32767797e-01, 3.68138403e-01],\n",
       "       [9.95043516e-01, 5.14345011e-03],\n",
       "       [9.93900299e-01, 6.20957604e-03],\n",
       "       [9.97990847e-01, 2.28208210e-03],\n",
       "       [8.38533640e-02, 9.23523605e-01],\n",
       "       [9.93036926e-01, 6.87335012e-03],\n",
       "       [9.56128418e-01, 4.12139818e-02],\n",
       "       [9.56128418e-01, 4.12139818e-02],\n",
       "       [1.26801789e-01, 8.76263022e-01],\n",
       "       [2.42131352e-01, 7.80837834e-01],\n",
       "       [8.39236677e-01, 1.56546742e-01],\n",
       "       [9.98509943e-01, 1.75377598e-03]], dtype=float32)"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "M_test = pad_sequences(M_test, maxlen=max_len)\n",
    "\n",
    "predictions = model.predict(M_test)\n",
    "predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[9.89297211e-01, 1.10417316e-02],\n",
       "       [3.73128988e-02, 9.67208505e-01],\n",
       "       [9.98658895e-01, 1.62478851e-03],\n",
       "       [9.96271133e-01, 4.04474512e-03],\n",
       "       [8.88998568e-01, 1.09605283e-01],\n",
       "       [9.98210311e-01, 2.08193017e-03],\n",
       "       [3.54536772e-01, 6.69178307e-01],\n",
       "       [9.85074520e-01, 1.48036592e-02],\n",
       "       [1.29813954e-01, 8.76039565e-01],\n",
       "       [9.62996125e-01, 3.44542079e-02],\n",
       "       [7.55765080e-01, 2.49890730e-01],\n",
       "       [3.65469605e-01, 6.57850266e-01],\n",
       "       [9.98800039e-01, 1.51763496e-03],\n",
       "       [9.97327685e-01, 3.15122958e-03],\n",
       "       [5.75969875e-01, 4.37167883e-01],\n",
       "       [9.98559892e-01, 1.73308724e-03],\n",
       "       [9.83565748e-01, 1.57846343e-02],\n",
       "       [9.99194920e-01, 1.05460756e-03],\n",
       "       [9.97159958e-01, 3.05769313e-03],\n",
       "       [7.86324322e-01, 2.11838797e-01],\n",
       "       [9.94806945e-01, 5.56808943e-03],\n",
       "       [9.94939566e-01, 5.16507169e-03],\n",
       "       [9.95327115e-01, 5.44817653e-03],\n",
       "       [8.30366760e-02, 9.23830807e-01],\n",
       "       [9.99270737e-01, 9.99870244e-04],\n",
       "       [9.98326242e-01, 1.99769647e-03],\n",
       "       [2.01347172e-01, 8.05066824e-01],\n",
       "       [9.97681141e-01, 2.63667619e-03],\n",
       "       [6.56308457e-02, 9.39631820e-01],\n",
       "       [9.97786045e-01, 2.47393013e-03],\n",
       "       [9.95178938e-01, 4.89054574e-03],\n",
       "       [9.98978972e-01, 1.31971261e-03],\n",
       "       [9.96212244e-01, 3.93838761e-03],\n",
       "       [8.08125079e-01, 1.83967963e-01],\n",
       "       [9.86248910e-01, 1.37963891e-02],\n",
       "       [8.94822180e-01, 9.63397995e-02],\n",
       "       [4.63879943e-01, 5.36770880e-01],\n",
       "       [9.90485072e-01, 9.23611410e-03],\n",
       "       [9.98726070e-01, 1.49498892e-03],\n",
       "       [9.91048157e-01, 9.32559464e-03],\n",
       "       [3.64511549e-01, 6.21637642e-01],\n",
       "       [9.82374847e-01, 1.73149928e-02],\n",
       "       [9.93298471e-01, 6.45173620e-03],\n",
       "       [7.04595149e-01, 2.89463103e-01],\n",
       "       [9.93536115e-01, 6.79136021e-03],\n",
       "       [9.99129593e-01, 1.11387786e-03],\n",
       "       [9.06374276e-01, 8.96570683e-02],\n",
       "       [4.10660505e-01, 5.88741422e-01],\n",
       "       [9.99692917e-01, 4.83944750e-04],\n",
       "       [7.80944601e-02, 9.30601835e-01],\n",
       "       [4.43848312e-01, 5.59155643e-01],\n",
       "       [1.56105727e-01, 8.51845980e-01],\n",
       "       [9.82641697e-01, 1.64951906e-02],\n",
       "       [5.26238121e-02, 9.54043090e-01],\n",
       "       [9.80851293e-01, 1.90182012e-02],\n",
       "       [9.90199268e-01, 1.01825390e-02],\n",
       "       [8.12352419e-01, 1.85036004e-01],\n",
       "       [9.63351190e-01, 3.39603312e-02],\n",
       "       [6.31880701e-01, 3.64272743e-01],\n",
       "       [8.38387251e-01, 1.53711647e-01],\n",
       "       [9.76741195e-01, 2.20144410e-02],\n",
       "       [9.90605891e-01, 9.22481064e-03],\n",
       "       [9.98764873e-01, 1.51382165e-03],\n",
       "       [9.94506538e-01, 6.27602870e-03],\n",
       "       [9.97006834e-01, 3.38183786e-03],\n",
       "       [9.80250001e-01, 1.75612047e-02],\n",
       "       [8.35835814e-01, 1.54920161e-01],\n",
       "       [9.93885934e-01, 6.47321297e-03],\n",
       "       [7.32705653e-01, 2.38107413e-01],\n",
       "       [1.67823553e-01, 8.28654826e-01],\n",
       "       [9.97954011e-01, 2.44288007e-03],\n",
       "       [9.97217536e-01, 3.17463791e-03],\n",
       "       [9.98234391e-01, 2.21486646e-03],\n",
       "       [8.36275935e-01, 1.66395113e-01],\n",
       "       [9.97660041e-01, 2.73835333e-03],\n",
       "       [9.42627370e-01, 5.81915826e-02],\n",
       "       [4.82731223e-01, 5.22184551e-01],\n",
       "       [9.30466115e-01, 6.39836937e-02],\n",
       "       [5.61084867e-01, 4.41416740e-01],\n",
       "       [4.50993061e-01, 5.64331651e-01],\n",
       "       [1.45042473e-02, 9.87169504e-01],\n",
       "       [9.00123000e-01, 1.01546042e-01],\n",
       "       [3.05879086e-01, 7.04485118e-01],\n",
       "       [4.30749863e-01, 5.69399238e-01],\n",
       "       [1.44111022e-01, 8.66918087e-01],\n",
       "       [9.00533974e-01, 1.01281039e-01],\n",
       "       [5.68374395e-01, 4.33054060e-01],\n",
       "       [9.90041316e-01, 9.53172334e-03],\n",
       "       [9.87220049e-01, 1.34074753e-02],\n",
       "       [9.50743973e-01, 5.25186770e-02],\n",
       "       [2.19779104e-01, 7.77013421e-01],\n",
       "       [1.54326940e-02, 9.86147463e-01],\n",
       "       [9.98808622e-01, 1.44694082e-03],\n",
       "       [8.91158879e-01, 1.06756985e-01],\n",
       "       [1.05071880e-01, 8.98244858e-01],\n",
       "       [9.99034405e-01, 1.23475143e-03],\n",
       "       [9.94593918e-01, 5.76898688e-03],\n",
       "       [9.93712962e-01, 6.65295729e-03],\n",
       "       [7.02256203e-01, 2.97228634e-01],\n",
       "       [9.96766925e-01, 3.70637397e-03],\n",
       "       [9.89144385e-01, 1.11364387e-02],\n",
       "       [9.79650915e-01, 2.00015400e-02],\n",
       "       [9.93528545e-01, 6.56247372e-03],\n",
       "       [9.87190068e-01, 1.34675717e-02],\n",
       "       [9.87129986e-01, 1.17099285e-02],\n",
       "       [9.97617066e-01, 2.67686578e-03],\n",
       "       [9.97251093e-01, 3.04796081e-03],\n",
       "       [9.99035120e-01, 1.22244644e-03],\n",
       "       [9.54006970e-01, 4.89431918e-02],\n",
       "       [9.46570277e-01, 5.37833124e-02],\n",
       "       [9.43213701e-01, 5.26795760e-02],\n",
       "       [9.95134652e-01, 5.25745936e-03],\n",
       "       [8.25005472e-01, 1.69372976e-01],\n",
       "       [8.70788097e-01, 1.24156632e-01],\n",
       "       [9.97023284e-01, 3.26669309e-03],\n",
       "       [7.92557359e-01, 1.97209224e-01],\n",
       "       [9.98770893e-01, 1.56408502e-03],\n",
       "       [9.90018487e-01, 1.06498580e-02],\n",
       "       [9.90723550e-01, 8.96911696e-03],\n",
       "       [2.37598643e-01, 7.74325430e-01],\n",
       "       [6.24084949e-01, 3.58046830e-01],\n",
       "       [9.97389972e-01, 2.78766919e-03],\n",
       "       [9.84458208e-01, 1.46457935e-02],\n",
       "       [9.72309589e-01, 2.36137267e-02],\n",
       "       [9.98931468e-01, 1.41089316e-03],\n",
       "       [9.94718015e-01, 5.69924805e-03],\n",
       "       [4.72977042e-01, 5.49978912e-01],\n",
       "       [9.90615129e-01, 9.25194938e-03],\n",
       "       [3.34342234e-02, 9.68245149e-01],\n",
       "       [9.99083996e-01, 1.16300583e-03],\n",
       "       [9.44297254e-01, 5.47527708e-02],\n",
       "       [9.72299933e-01, 2.72788983e-02],\n",
       "       [9.93855774e-01, 6.32649660e-03],\n",
       "       [9.62590456e-01, 3.52819376e-02],\n",
       "       [9.79455829e-01, 1.92554407e-02],\n",
       "       [9.99165893e-01, 1.05177308e-03],\n",
       "       [9.85086679e-01, 1.40042715e-02],\n",
       "       [2.78653465e-02, 9.75346625e-01],\n",
       "       [9.98081207e-01, 2.21448927e-03],\n",
       "       [9.95232165e-01, 5.13720512e-03],\n",
       "       [8.88802350e-01, 1.11462571e-01],\n",
       "       [9.88667548e-01, 1.16618285e-02],\n",
       "       [9.98724878e-01, 1.53446209e-03],\n",
       "       [9.77607906e-01, 2.18318254e-02],\n",
       "       [9.96304035e-01, 4.18575248e-03],\n",
       "       [2.64609367e-01, 7.45012462e-01],\n",
       "       [9.72862959e-01, 2.69199926e-02],\n",
       "       [9.83956635e-01, 1.73143838e-02],\n",
       "       [9.98144031e-01, 2.16847914e-03],\n",
       "       [9.98341322e-01, 1.96177256e-03],\n",
       "       [9.79604721e-01, 1.93097256e-02],\n",
       "       [9.98944461e-01, 1.24808261e-03],\n",
       "       [8.86560798e-01, 1.12452798e-01],\n",
       "       [9.96718824e-01, 3.71032930e-03],\n",
       "       [2.21701801e-01, 7.80260146e-01],\n",
       "       [2.55418997e-02, 9.74908948e-01],\n",
       "       [9.99462545e-01, 7.41661002e-04],\n",
       "       [8.23981166e-01, 1.70397952e-01],\n",
       "       [6.52386248e-02, 9.39590633e-01],\n",
       "       [9.98122513e-01, 2.17202515e-03],\n",
       "       [7.12066889e-01, 2.86067158e-01],\n",
       "       [8.19861591e-02, 9.32014406e-01],\n",
       "       [9.58634377e-01, 3.82429920e-02],\n",
       "       [8.24238896e-01, 1.68227255e-01],\n",
       "       [9.98916030e-01, 1.41686620e-03],\n",
       "       [9.96592700e-01, 3.68587370e-03],\n",
       "       [7.33839869e-01, 2.57828295e-01],\n",
       "       [1.06705025e-01, 8.97602201e-01],\n",
       "       [3.80194485e-01, 6.08735800e-01],\n",
       "       [8.97293806e-01, 9.94179696e-02],\n",
       "       [9.10029948e-01, 8.81575868e-02],\n",
       "       [9.89466846e-01, 1.07162707e-02],\n",
       "       [1.63196921e-02, 9.86219883e-01],\n",
       "       [9.79962826e-01, 1.92297176e-02],\n",
       "       [9.99110639e-01, 1.11217669e-03],\n",
       "       [9.98811603e-01, 1.45246438e-03],\n",
       "       [9.10705030e-01, 8.23124573e-02],\n",
       "       [9.86083686e-01, 1.50616029e-02],\n",
       "       [9.94467974e-01, 5.80535224e-03],\n",
       "       [9.93595064e-01, 6.75331987e-03],\n",
       "       [9.99255478e-01, 9.77048301e-04],\n",
       "       [8.52798402e-01, 1.40539274e-01],\n",
       "       [1.64930165e-01, 8.44964683e-01],\n",
       "       [9.97384012e-01, 2.95930239e-03],\n",
       "       [9.96524751e-01, 3.76476254e-03],\n",
       "       [9.98174548e-01, 2.11151456e-03],\n",
       "       [6.01421416e-01, 3.98873955e-01],\n",
       "       [1.82097703e-01, 8.35628331e-01],\n",
       "       [9.97257769e-01, 3.08231893e-03],\n",
       "       [6.75210297e-01, 3.04572284e-01],\n",
       "       [9.53007460e-01, 4.57556657e-02],\n",
       "       [6.44749761e-01, 3.54564339e-01],\n",
       "       [9.99813974e-01, 2.97560182e-04],\n",
       "       [1.71126738e-01, 8.52856874e-01],\n",
       "       [9.67297077e-01, 3.14390585e-02],\n",
       "       [2.50648465e-02, 9.78119493e-01],\n",
       "       [7.36107171e-01, 2.60057181e-01],\n",
       "       [1.12051122e-01, 8.95975471e-01],\n",
       "       [9.98990476e-01, 1.27349573e-03],\n",
       "       [2.98825242e-02, 9.72770095e-01],\n",
       "       [9.96666729e-01, 3.56671633e-03],\n",
       "       [9.95280921e-01, 5.05312020e-03],\n",
       "       [4.67087999e-02, 9.55936015e-01],\n",
       "       [2.31040847e-02, 9.79663670e-01],\n",
       "       [9.92311239e-01, 7.36055616e-03],\n",
       "       [8.72887135e-01, 1.25496492e-01],\n",
       "       [9.83360767e-01, 1.60082821e-02],\n",
       "       [2.60338001e-02, 9.78127003e-01],\n",
       "       [9.79926884e-01, 1.79750286e-02],\n",
       "       [2.64777075e-02, 9.75348890e-01],\n",
       "       [8.47916961e-01, 1.61791548e-01],\n",
       "       [9.91084099e-01, 9.56200063e-03],\n",
       "       [9.97634172e-01, 2.64891679e-03],\n",
       "       [9.88843799e-01, 1.17526846e-02],\n",
       "       [9.99542236e-01, 6.39197184e-04],\n",
       "       [9.99526381e-01, 6.73291273e-04],\n",
       "       [9.89566207e-01, 1.05931843e-02],\n",
       "       [9.84892607e-01, 1.59311239e-02],\n",
       "       [9.47403908e-01, 5.09829447e-02],\n",
       "       [9.97905850e-01, 2.34915246e-03],\n",
       "       [3.12148798e-02, 9.70876515e-01],\n",
       "       [3.71371321e-02, 9.64185297e-01],\n",
       "       [2.86366083e-02, 9.73299086e-01],\n",
       "       [9.98304725e-01, 1.95303746e-03],\n",
       "       [5.43259323e-01, 4.45621550e-01],\n",
       "       [9.95735526e-01, 4.47552558e-03],\n",
       "       [9.99664187e-01, 4.84757824e-04],\n",
       "       [4.77395892e-01, 5.53484499e-01],\n",
       "       [2.67982967e-02, 9.75562334e-01],\n",
       "       [5.11212230e-01, 4.82527465e-01],\n",
       "       [9.96845663e-01, 3.34183313e-03],\n",
       "       [9.98655796e-01, 1.58252590e-03],\n",
       "       [9.98682797e-01, 1.57597498e-03],\n",
       "       [9.97531652e-01, 2.68543395e-03],\n",
       "       [8.89724314e-01, 1.02902323e-01],\n",
       "       [9.96919513e-01, 3.27811856e-03],\n",
       "       [9.76353884e-01, 2.36234013e-02],\n",
       "       [9.98018861e-01, 2.27649114e-03],\n",
       "       [2.69563273e-02, 9.76159155e-01],\n",
       "       [6.68949783e-02, 9.39164698e-01],\n",
       "       [9.73651588e-01, 2.52439454e-02],\n",
       "       [9.98613954e-01, 1.62094866e-03],\n",
       "       [9.44671273e-01, 5.45767955e-02],\n",
       "       [4.35869396e-02, 9.60251033e-01],\n",
       "       [1.88457757e-01, 8.20301712e-01],\n",
       "       [2.59413511e-01, 7.33782709e-01],\n",
       "       [5.41639701e-02, 9.47131872e-01],\n",
       "       [9.19235591e-03, 9.91685688e-01],\n",
       "       [9.94127393e-01, 5.99407079e-03],\n",
       "       [9.86760855e-01, 1.31814247e-02],\n",
       "       [9.78301644e-01, 2.23909188e-02],\n",
       "       [9.95761573e-01, 4.44441987e-03],\n",
       "       [9.99497414e-01, 7.02120829e-04],\n",
       "       [9.97229755e-01, 2.99413246e-03],\n",
       "       [2.33508293e-02, 9.80765402e-01],\n",
       "       [9.94520366e-01, 6.04913617e-03],\n",
       "       [9.97494936e-01, 2.87344446e-03],\n",
       "       [9.98973131e-01, 1.26466772e-03],\n",
       "       [4.97004330e-01, 4.97818023e-01],\n",
       "       [9.82652664e-01, 1.84881184e-02],\n",
       "       [1.31787378e-02, 9.88159657e-01],\n",
       "       [3.66922885e-01, 6.31668866e-01],\n",
       "       [9.96259332e-01, 4.08321712e-03],\n",
       "       [9.98756170e-01, 1.47667748e-03],\n",
       "       [7.72360861e-02, 9.27823484e-01],\n",
       "       [9.98449206e-01, 1.86312699e-03],\n",
       "       [3.15861739e-02, 9.71484125e-01],\n",
       "       [9.96845782e-01, 3.44880344e-03],\n",
       "       [7.42987245e-02, 9.30592895e-01],\n",
       "       [2.09757891e-02, 9.81689215e-01],\n",
       "       [9.98074651e-01, 2.20919168e-03],\n",
       "       [6.46533668e-01, 3.48830134e-01],\n",
       "       [5.98557949e-01, 3.84560645e-01],\n",
       "       [9.99404311e-01, 7.86028046e-04],\n",
       "       [9.98620272e-01, 1.63661852e-03],\n",
       "       [9.99131620e-01, 1.08906056e-03],\n",
       "       [3.69940363e-02, 9.68174577e-01],\n",
       "       [9.98901844e-01, 1.32231705e-03],\n",
       "       [9.85736310e-01, 1.49741964e-02],\n",
       "       [6.43389583e-01, 3.48876834e-01],\n",
       "       [4.78291005e-01, 5.33889949e-01],\n",
       "       [9.99354661e-01, 8.77332001e-04],\n",
       "       [5.98759837e-02, 9.42334712e-01],\n",
       "       [9.63255167e-01, 3.59756313e-02],\n",
       "       [2.05777939e-02, 9.81795371e-01],\n",
       "       [8.92112032e-02, 9.14921701e-01],\n",
       "       [9.96331871e-01, 4.17977013e-03],\n",
       "       [5.23199812e-02, 9.52406943e-01],\n",
       "       [9.88084376e-01, 1.19793927e-02],\n",
       "       [9.69554842e-01, 2.73279194e-02],\n",
       "       [3.67223620e-01, 6.27295792e-01],\n",
       "       [9.16537941e-01, 8.18802044e-02],\n",
       "       [9.99439061e-01, 7.88773061e-04],\n",
       "       [2.48765163e-02, 9.77342725e-01],\n",
       "       [9.96949136e-01, 3.68632539e-03],\n",
       "       [6.32767797e-01, 3.68138403e-01],\n",
       "       [9.95043516e-01, 5.14345011e-03],\n",
       "       [9.93900299e-01, 6.20957604e-03],\n",
       "       [9.97990847e-01, 2.28208210e-03],\n",
       "       [8.38533640e-02, 9.23523605e-01],\n",
       "       [9.93036926e-01, 6.87335012e-03],\n",
       "       [9.56128418e-01, 4.12139818e-02],\n",
       "       [9.56128418e-01, 4.12139818e-02],\n",
       "       [1.26801789e-01, 8.76263022e-01],\n",
       "       [2.42131352e-01, 7.80837834e-01],\n",
       "       [8.39236677e-01, 1.56546742e-01],\n",
       "       [9.98509943e-01, 1.75377598e-03]], dtype=float32)"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "스릴러일 확률 (1)  0.2028985507246377\n",
      "로맨스일 확률 (0)  0.7971014492753623\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "predict_labels = np.argmax(predictions, axis = 1)\n",
    "th_tot = 0\n",
    "ro_tot = 0\n",
    "for i in range(69):\n",
    "    if(predict_labels[i]==1):\n",
    "        th_tot += 1\n",
    "    else:\n",
    "        ro_tot += 1\n",
    "print(\"스릴러일 확률 (1) \" , th_tot / 69)\n",
    "print(\"로맨스일 확률 (0) \" , ro_tot / 69)\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 판타지 VS 사회"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "줄거리 최대 길이 :  2324\n",
      "줄거리 평균 길이 :  275.97285714285715\n"
     ]
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#판타지 사회\n",
    "FS_X_train = []\n",
    "FS_Y_train = [] \n",
    "\n",
    "FStrain_data_size = 1400//2\n",
    "\n",
    "for i in range(FStrain_data_size):\n",
    "    FS_X_train.append(SC_encoded[i])\n",
    "    FS_Y_train.append([1,0])\n",
    "    FS_X_train.append(FN_encoded[i])\n",
    "    FS_Y_train.append([0,1])\n",
    "    \n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "len_result = [len(s) for s in FS_X_train]\n",
    "print(\"줄거리 최대 길이 : \",max(len_result))\n",
    "print(\"줄거리 평균 길이 : \",sum(len_result)/len(len_result))\n",
    "\n",
    "plt.subplot(1,2,1)\n",
    "plt.boxplot(len_result)\n",
    "plt.subplot(1,2,2)\n",
    "plt.hist(len_result, bins=50)\n",
    "plt.show()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "줄거리 최대 길이 :  1336\n",
      "줄거리 평균 길이 :  193.3892857142857\n"
     ]
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "FS_X_test = []\n",
    "FS_Y_test = []  #0 : romance, 1 : thriller \n",
    "\n",
    "for i in range(FStrain_data_size):\n",
    "    FS_X_test.append(SC_encoded[FStrain_data_size+i])\n",
    "    FS_Y_test.append([1,0])\n",
    "    FS_X_test.append(FN_encoded[FStrain_data_size+i])\n",
    "    FS_Y_test.append([0,1])\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "len_result = [len(s) for s in FS_X_test]\n",
    "print(\"줄거리 최대 길이 : \",max(len_result))\n",
    "print(\"줄거리 평균 길이 : \",sum(len_result)/len(len_result))\n",
    "\n",
    "plt.subplot(1,2,1)\n",
    "plt.boxplot(len_result)\n",
    "plt.subplot(1,2,2)\n",
    "plt.hist(len_result, bins=50)\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_len = 230\n",
    "\n",
    "FS_X_train = pad_sequences(FS_X_train, maxlen=max_len)\n",
    "FS_X_test = pad_sequences(FS_X_test, maxlen=max_len)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 1400 samples, validate on 1400 samples\n",
      "Epoch 1/3\n",
      "  64/1400 [>.............................] - ETA: 27sWARNING:tensorflow:Early stopping conditioned on metric `val_loss` which is not available. Available metrics are: \n",
      "WARNING:tensorflow:Can save best model only with val_acc available, skipping.\n"
     ]
    },
    {
     "ename": "InvalidArgumentError",
     "evalue": " indices[54,195] = 8646 is not in [0, 5002)\n\t [[node sequential_6/embedding_6/embedding_lookup (defined at <ipython-input-44-4c2962189ab4>:16) ]] [Op:__inference_distributed_function_13289]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node sequential_6/embedding_6/embedding_lookup:\n sequential_6/embedding_6/embedding_lookup/12144 (defined at C:\\ProgramData\\Anaconda3\\lib\\contextlib.py:112)\n\nFunction call stack:\ndistributed_function\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mInvalidArgumentError\u001b[0m                      Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-44-4c2962189ab4>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     15\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'binary_crossentropy'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'adam'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'acc'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mFS_X_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mFS_Y_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mFS_X_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mFS_Y_test\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m64\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mes\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmc\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[0;32m    817\u001b[0m         \u001b[0mmax_queue_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    818\u001b[0m         \u001b[0mworkers\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 819\u001b[1;33m         use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[0;32m    820\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    821\u001b[0m   def evaluate(self,\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[0;32m    340\u001b[0m                 \u001b[0mmode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mModeKeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    341\u001b[0m                 \u001b[0mtraining_context\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtraining_context\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 342\u001b[1;33m                 total_epochs=epochs)\n\u001b[0m\u001b[0;32m    343\u001b[0m             \u001b[0mcbks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmake_logs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepoch_logs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraining_result\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mModeKeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    344\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2.py\u001b[0m in \u001b[0;36mrun_one_epoch\u001b[1;34m(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)\u001b[0m\n\u001b[0;32m    126\u001b[0m         step=step, mode=mode, size=current_batch_size) as batch_logs:\n\u001b[0;32m    127\u001b[0m       \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 128\u001b[1;33m         \u001b[0mbatch_outs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    129\u001b[0m       \u001b[1;32mexcept\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mStopIteration\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mOutOfRangeError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    130\u001b[0m         \u001b[1;31m# TODO(kaftan): File bug about tf function and errors.OutOfRangeError?\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2_utils.py\u001b[0m in \u001b[0;36mexecution_function\u001b[1;34m(input_fn)\u001b[0m\n\u001b[0;32m     96\u001b[0m     \u001b[1;31m# `numpy` translates Tensors to values in Eager mode.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     97\u001b[0m     return nest.map_structure(_non_none_constant_value,\n\u001b[1;32m---> 98\u001b[1;33m                               distributed_function(input_fn))\n\u001b[0m\u001b[0;32m     99\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    100\u001b[0m   \u001b[1;32mreturn\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m    566\u001b[0m         \u001b[0mxla_context\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    567\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 568\u001b[1;33m       \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    569\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    570\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m_call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m    630\u001b[0m         \u001b[1;31m# Lifting succeeded, so variables are initialized and we can run the\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    631\u001b[0m         \u001b[1;31m# stateless function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 632\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    633\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    634\u001b[0m       \u001b[0mcanon_args\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcanon_kwds\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   2361\u001b[0m     \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2362\u001b[0m       \u001b[0mgraph_function\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2363\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m  \u001b[1;31m# pylint: disable=protected-access\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2364\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2365\u001b[0m   \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[1;34m(self, args, kwargs)\u001b[0m\n\u001b[0;32m   1609\u001b[0m          if isinstance(t, (ops.Tensor,\n\u001b[0;32m   1610\u001b[0m                            resource_variable_ops.BaseResourceVariable))),\n\u001b[1;32m-> 1611\u001b[1;33m         self.captured_inputs)\n\u001b[0m\u001b[0;32m   1612\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1613\u001b[0m   \u001b[1;32mdef\u001b[0m \u001b[0m_call_flat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcaptured_inputs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcancellation_manager\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m   1690\u001b[0m       \u001b[1;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1691\u001b[0m       return self._build_call_outputs(self._inference_function.call(\n\u001b[1;32m-> 1692\u001b[1;33m           ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[0;32m   1693\u001b[0m     forward_backward = self._select_forward_and_backward_functions(\n\u001b[0;32m   1694\u001b[0m         \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36mcall\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m    543\u001b[0m               \u001b[0minputs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    544\u001b[0m               \u001b[0mattrs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"executor_type\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexecutor_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"config_proto\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 545\u001b[1;33m               ctx=ctx)\n\u001b[0m\u001b[0;32m    546\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    547\u001b[0m           outputs = execute.execute_with_cancellation(\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m     65\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     66\u001b[0m       \u001b[0mmessage\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 67\u001b[1;33m     \u001b[0msix\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_status_to_exception\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmessage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     68\u001b[0m   \u001b[1;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     69\u001b[0m     keras_symbolic_tensors = [\n",
      "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\six.py\u001b[0m in \u001b[0;36mraise_from\u001b[1;34m(value, from_value)\u001b[0m\n",
      "\u001b[1;31mInvalidArgumentError\u001b[0m:  indices[54,195] = 8646 is not in [0, 5002)\n\t [[node sequential_6/embedding_6/embedding_lookup (defined at <ipython-input-44-4c2962189ab4>:16) ]] [Op:__inference_distributed_function_13289]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node sequential_6/embedding_6/embedding_lookup:\n sequential_6/embedding_6/embedding_lookup/12144 (defined at C:\\ProgramData\\Anaconda3\\lib\\contextlib.py:112)\n\nFunction call stack:\ndistributed_function\n"
     ]
    }
   ],
   "source": [
    "model = Sequential()\n",
    "model.add(Embedding(5002, 120))\n",
    "model.add(LSTM(120))\n",
    "model.add(Dense(2, activation='sigmoid'))\n",
    "\n",
    "es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)\n",
    "mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)\n",
    "\n",
    "FS_X_train = np.array(FS_X_train)\n",
    "FS_X_test = np.array(FS_X_test)\n",
    "FS_Y_train = np.array(FS_Y_train)\n",
    "FS_Y_test = np.array(FS_Y_test)\n",
    "\n",
    "\n",
    "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])\n",
    "model.fit(FS_X_train, FS_Y_train, validation_data=(FS_X_test, FS_Y_test), epochs=3, batch_size=64, callbacks=[es, mc])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 위에서 실행한 베스트 모델로 뮤지컬 예측값 확인하기"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 드라마 VS 역사"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "줄거리 최대 길이 :  1389\n",
      "줄거리 평균 길이 :  258.6014285714286\n"
     ]
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "DH_X_train = []\n",
    "DH_Y_train = [] \n",
    "\n",
    "DHtrain_data_size = 1400//2\n",
    "\n",
    "for i in range(DHtrain_data_size):\n",
    "    DH_X_train.append(DR_encoded[i])\n",
    "    DH_Y_train.append([1,0])\n",
    "    DH_X_train.append(HS_encoded[i])\n",
    "    DH_Y_train.append([0,1])\n",
    "    \n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "len_result = [len(s) for s in DH_X_train]\n",
    "print(\"줄거리 최대 길이 : \",max(len_result))\n",
    "print(\"줄거리 평균 길이 : \",sum(len_result)/len(len_result))\n",
    "\n",
    "plt.subplot(1,2,1)\n",
    "plt.boxplot(len_result)\n",
    "plt.subplot(1,2,2)\n",
    "plt.hist(len_result, bins=50)\n",
    "plt.show()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "줄거리 최대 길이 :  1880\n",
      "줄거리 평균 길이 :  183.68142857142857\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAcaklEQVR4nO3df5CcVb3n8feHOMgaw00igcoP2IAEa0J2TXQKKBitRBEil7rBresucynNymxFs2RKd6UqkflDuLtDEfYqtSI/anCyhFtkEK6i1BXU3OzcS00p6gQTyGTkMiDKmFQSCZgILPn13T/6dOjMz56Znumefj6vqqf6eU6fp/s76cl3Tp/nPOcoIjAzs2w4rdwBmJnZ5HHSNzPLECd9M7MMcdI3M8sQJ30zswx5T7kDGMlZZ50VCxcuLHcYVqW2b9/+x4iYM9nv699rm0jD/V5XfNJfuHAhXV1d5Q7DqpSk35Xjff17bRNpuN9rd++YmWWIk76ZWYY46ZuZZYiTvplZhjjpm5lliJN+lWhvb2fJkiVMmzaNJUuW0N7eXu6QzKwCVfyQTRtZe3s7zc3NtLW1UV9fT2dnJ42NjQA0NDSUOTozqyRu6VeBlpYW2traWLFiBTU1NaxYsYK2tjZaWlrKHZqZVRgn/SrQ09NDfX39KWX19fX09PSUKSIzq1RO+lWgtraWzs7OU8o6Ozupra0tU0RTm6QzJP1S0k5J3ZJuS+XnS/qFpBclfVfS6an8vem4Nz2/cDzvv3DDj05uZqXmpF8FmpubaWxspKOjg6NHj9LR0UFjYyPNzc3lDm2qegf4RER8GFgKrJR0GbARuCsiFgGvA42pfiPwekRcCNyV6plVJF/IrQL5i7VNTU309PRQW1tLS0uLL+KOUeTWEP1zOqxJWwCfAP4mlW8GbgXuA1alfYB/AL4tSeG1SK0COelXiYaGBif5EpI0DdgOXAjcA7wEvBERx1KVPmB+2p8PvAoQEcck/Qn4APDHfq+5BlgDcN555030j2A2KHfvmA0iIo5HxFJgAXAJMNgFknxLXsM8V/iarRFRFxF1c+ZM+mzOZoCTvtmwIuIN4J+By4CZkvLfjhcAe9J+H3AuQHr+L4CDkxupWXGc9M36kTRH0sy0/2+AK4EeoAP461RtNfDDtP9EOiY9/3/dn2+Vyn36ZgPNBTanfv3TgEcj4h8l7QYekfQ/gV8Dbal+G/D3knrJtfCvL0fQZsVw0jfrJyKeA5YNUv4yuf79/uX/D/jsJIRmNm7u3jEzy5ARk76kTZL2S9pVUPZdSTvS9oqkHal8oaS3C567v+Ccj0p6Pt21+C1Jg414MDOzCVRM986DwLeBh/IFEfGf8vuSvgH8qaD+S2moW3/3kRuj/AzwJLASeGr0IZuZ2ViN2NKPiKcZYvhZaq3/R2DYydslzQXOjIifp1ENDwHXjT5cMzMbj/H26X8M2BcRLxaUnS/p15L+RdLHUtl8cmOZ8wrvZhxA0hpJXZK6Dhw4MM4Qs8GLqJhZMcY7eqeBU1v5e4HzIuI1SR8FfiDpYoq8Y/HkExGtQCtAXV2dxzuPwIuomFmxxtzST3ce/gfgu/myiHgnIl5L+9vJzVdyEbmW/YKC0wvvZrRx8iIqZlas8XTvXAn8JiJOdtukOxmnpf0LgEXAyxGxFzgs6bJ0HeDzvHs3o42TF1Exs2IVM2SzHfg58CFJfZLyc4hfz8ALuB8HnpO0k9wUs1+KiPxF4LXAd4Bect8APHKnRLyIipkVa8Q+/YgYtFM4Iv7zIGXfA743RP0uYMko47Mi5BdR6d+n7+4dM+vP0zBUAS+iYmbFctKvEl5ExcyK4bl3zMwyxEnfzCxDnPTNzDLESd/MLEOc9M3MMsRJ38wsQ5z0zcwyxOP0zSrYwg0/Orn/yh1/WcZIrFq4pW9mliFO+mZmGeKkb2aWIU76ZmYZ4qRvZpYhTvpm/Ug6V1KHpB5J3ZK+nMpvlfQHSTvSdk3BOV+T1CvpBUlXly96s+F5yKbZQMeAr0bEs5JmANslbU3P3RURf1dYWdJicivJXQzMA/5J0kURcXxSozYrQjHLJW6StF/SroKyUbd4JK1MZb2SNpT+RzErjYjYGxHPpv3DQA8wf5hTVgGPRMQ7EfFbckuCXjLxkZqNXjHdOw8CKwcpvysilqbtSRjQ4lkJ3CtpWlos/R7g08BioCHVNatokhYCy4BfpKJ1kp5LjaFZqWw+8GrBaX0M8kdC0hpJXZK6Dhw4MIFRmw1txKQfEU8DB0eqlwzV4rkE6I2IlyPiCPBIqmtWsSS9n9yaz1+JiEPAfcAHgaXAXuAb+aqDnB4DCiJaI6IuIurmzJkzQVGbDW88F3JH0+IpqiWU5xaRlZukGnIJ/+GI+D5AROyLiOMRcQJ4gHe7cPqAcwtOXwDsmcx4zYo11qQ/2hZPUS2hk0+4RWRlJElAG9ATEd8sKJ9bUO0zQP461xPA9ZLeK+l8YBHwy8mK12w0xjR6JyL25fclPQD8YzocrsXjlpBNFVcAnwOel7Qjld1C7lrUUnINlleALwJERLekR4Hd5Eb+3OSRO1apxpT0Jc2NiL3psH+LZ4ukb5IbupZv8QhYlFpBfyB3sfdvxhO42USJiE4G/3b65DDntAAtExaUWYmMmPQltQPLgbMk9QFfB5aPtsUjaR3wE2AasCkiukv+05iZ2bBGTPoR0TBIcdsw9Qdt8aRhnUO2lMzMbOJ5GgYzswxx0jczyxAnfTOzDHHSNzPLECd9M7MMcdI3M8sQJ30zswxx0jczyxAn/SrR3t7OkiVLmDZtGkuWLKG9vb3cIZlZBfJyiVWgvb2d5uZm2traqK+vp7Ozk8bGRgAaGga7odrMssot/SrQ0tJCW1sbK1asoKamhhUrVtDW1kZLi+f/MrNTOelXgZ6eHurr608pq6+vp6enp0wRmVmlctKvArW1tXR2dp5S1tnZSW1tbZkiMrNK5aRfBZqbm2lsbKSjo4OjR4/S0dFBY2Mjzc3N5Q7NzCqML+RWgfzF2qamJnp6eqitraWlpcUXcc1sACf9KtHQ0OAkb2YjcveOmVmGOOmbmWXIiElf0iZJ+yXtKij7X5J+I+k5SY9LmpnKF0p6W9KOtN1fcM5HJT0vqVfStyQNtvC0mZlNoGJa+g8CK/uVbQWWRMS/B/4V+FrBcy9FxNK0famg/D5gDbAobf1f08zMJtiIST8ingYO9iv7aUQcS4fPAAuGew1Jc4EzI+LnERHAQ8B1YwvZzMzGqhR9+jcCTxUcny/p15L+RdLHUtl8oK+gTl8qG5SkNZK6JHUdOHCgBCGamRmMM+lLagaOAQ+nor3AeRGxDPjvwBZJZwKD9d/HUK8bEa0RURcRdXPmzBlPiGZmVmDMSV/SauBa4IbUZUNEvBMRr6X97cBLwEXkWvaFXUALgD1jfW+ziSTpXEkdknokdUv6ciqfLWmrpBfT46xUrjQ4oTcNbvhIeX8Cs6GNKelLWgmsB/4qIt4qKJ8jaVrav4DcBduXI2IvcFjSZWnUzueBH447erOJcQz4akTUApcBN0laDGwAtkXEImBbOgb4NO8OUFhDbtCCWUUqZshmO/Bz4EOS+iQ1At8GZgBb+w3N/DjwnKSdwD8AX4qI/EXgtcB3gF5y3wAKrwOYVYyI2BsRz6b9w0APuWtQq4DNqdpm3h2MsAp4KHKeAWamwQtmFWfEaRgiYrB7+9uGqPs94HtDPNcFLBlVdGZlJmkhsAz4BXBO+tZKROyVdHaqNh94teC0/ECFvf1eaw25bwKcd955Exq32VB8R67ZECS9n1wj5isRcWi4qoOUDRio4AEKVgmc9M0GIamGXMJ/OCK+n4r35btt0uP+VN4HnFtwugcqWMVy0q8SXhi9dNJggzagJyK+WfDUE8DqtL+adwcjPAF8Po3iuQz4U74byKzSeGrlKuCF0UvuCuBzwPOSdqSyW4A7gEfTYIbfA59Nzz0JXENukMJbwBcmN1yz4jnpV4HChdGBkwujNzU1OemPQUR0Mng/PcAnB6kfwE0TGpRZibh7pwr09PTQ19d3SvdOX1+fF0Y3swHc0q8C8+bNY/369Tz88MMnu3duuOEG5s2bV+7QzKzCuKVfJdJMGEMem5mBk35V2LNnD3feeSdNTU2cccYZNDU1ceedd7Jnj0cNmtmp3L1TBWpra1mwYAG7dp1c3IyOjg5qa2vLGJWZVSK39KtAc3MzjY2NdHR0cPToUTo6OmhsbKS5ubncoZlZhXFLvwrkh2U2NTXR09NDbW0tLS0tHq5pZgM46VeJhoYGJ3kzG5G7d8zMMsRJv0p47h0zK4a7d6qA594xs2K5pV8FCufeqampOTn3TktLS7lDM7MKU1TSl7RJ0n5JuwrKRr1ItKTVqf6LaWF1K4Genh7q6+tPKauvr/fcO2Y2QLEt/QeBlf3KRrVItKTZwNeBS4FLgK/n/1DY+NTW1nLbbbed0qd/2223+eYsMxugqKQfEU8DB/sVj3aR6KuBrRFxMCJeB7Yy8A+JjcGKFSvYuHEjN954I4cPH+bGG29k48aNJ6daNjPLG0+f/imLRAMjLRI9VPkAktZI6pLUdeDAgXGEmA0dHR2sX7+eTZs2MWPGDDZt2sT69evp6Ogod2hmVmEm4kLuUItEF7V4NHgB6dHq6enh4MGD9Pb2cuLECXp7ezl48KD79M1sgPEk/dEuEu3FoyfIzJkzaW1t5fbbb+fNN9/k9ttvp7W1lZkzZ5Y7NDOrMONJ+qNdJPonwFWSZqULuFelMhunQ4cOceaZZ7Js2TJqampYtmwZZ555JocOHSp3aGZWYYq6OUtSO7AcOEtSH7lROKNaJDoiDkr6H8CvUr2/jYj+F4dtDI4dO8a8efP4xCc+cbLs4osvpru7u4xRmVklKnb0TkNEzI2ImohYEBFtEfFaRHwyIhalx4OpbkTETRHxwYj4dxHRVfA6myLiwrT9n4n6obJGEt3d3axdu5Y33niDtWvX0t3djTTU2t5mllW+I7cKRASSuPDCC6mpqeHCCy9EkpdMNLMBnPSrxPLly7n55puZPn06N998M8uXLy93SGZWgZz0q0RXVxfbtm3jyJEjbNu2ja6urpFPMrPMcdKvAtOnT+fw4cM89thjvPXWWzz22GMcPnyY6dOnlzs0M6swTvpV4O233+bKK6/k/vvvZ+bMmdx///1ceeWVvP322+UObUoaYoLBWyX9QdKOtF1T8NzX0gSDL0i6ujxRmxXHSb8K1NbWcumll7J48WJOO+00Fi9ezKWXXuoJ18buQQafF+quiFiaticBJC0GrgcuTufcK2napEVqNkpO+lXAE66V1hATDA5lFfBIRLwTEb8ld3/KJRMWnNk4OelXgY6ODq699lpuueUWpk+fzi233MK1117rCddKb11aI2JTwbTgnkjQphQn/Sqwe/dudu7cyVNPPcWRI0d46qmn2LlzJ7t37y53aNXkPuCDwFJgL/CNVO6JBG1KcdKvAqeffjrr1q07ZbnEdevWcfrpp5c7tKoREfsi4nhEnAAe4N0uHE8kaFOKk34VOHLkCHfffTcdHR0cPXqUjo4O7r77bo4cOVLu0KpGfkbZ5DNAfmTPE8D1kt4r6XxyK8b9crLjMytWUROuWWVbvHgx1113HU1NTfT09FBbW8sNN9zAD37wg3KHNiUNMcHgcklLyXXdvAJ8ESAiuiU9CuwGjgE3RcTxcsRtVgwn/SrQ3NxMc3MzbW1t1NfX09nZSWNjIy0tLeUObUqKiIZBituGqd8C+B/bpgQn/SrQ0JDLUYUt/ZaWlpPlZmZ5TvpVoqGhwUnezEbkC7lmZhnipF8lmpqaOOOMM5DEGWecQVNTU7lDMrMKNOakL+lDBZNP7ZB0SNJXPDHV5GtqauKee+7h+PHcoJHjx49zzz33OPGb2QBjTvoR8UJ+8ingo+TWw308Pe2JqSbRvffeiyQ2btzIm2++ycaNG5HEvffeW+7QzKzClKp755PASxHxu2HqeGKqCXLixAkuuOCCU1bOuuCCCzhx4kS5Q7MSWrjhRyzc8KNyh2FTXKmS/vVAe8GxJ6aaZL29vScXQpdEb29vmSMys0o07qQv6XTgr4DHUpEnpiqT973vfac8mpn1V4qW/qeBZyNiH3hiqnL685//fMqjmVl/pUj6DRR07XhiKjOzyjWupC/pfcCngO8XFN8p6XlJzwErgP8GuYmpgPzEVD/GE1OV3OWXX86ePXu4/PLLyx2KmVWocU3DEBFvAR/oV/a5Yep7YqoJ9LOf/Yx58+aVOwwzq2C+I9fMLEOc9M3MMsRJ38wsQ5z0zcwyxEm/iqxdu5Y33niDtWvXljsUM6tQXkSlirS2tnLfffcxbZrnsTOzwbmlb2aWIW7pT1H5ydUKFc6n379exKDTHJlZxrilP0VFxMlty5YtzJgxg5qaGgBqamqYMWMGW7ZsOVnHzAzc0q8K+QXRW1pa6O7u5qKLLqK5udkLpVepwjn1X7njL8sYiU1FTvpVoqGhgYaGBiSxa9eukU8ws0xy946ZWYY46Zv1k1Z82y9pV0HZbElbJb2YHmelckn6lqTetFrcR8oXudnInPTNBnoQWNmvbAOwLSIWAdvSMeQWEVqUtjXkVo4zq1hO+mb9RMTTwMF+xauAzWl/M3BdQflDkfMMMLPfQkJmFcVJ36w450TEXoD0eHYqnw+8WlCvL5UNIGmNpC5JXQcOHJjQYM2G4qRvNj4D75KDQW+MiIjWiKiLiLo5c+ZMcFhmg3PSNyvOvny3TXrcn8r7gHML6i0A9kxybGZFG3fSl/RKWhN3h6SuVOaRDlZtngBWp/3VwA8Lyj+ffrcvA/6U7wYyq0SlaumviIilEVGXjj3SwaYsSe3Az4EPSeqT1AjcAXxK0ovAp9IxwJPAy0Av8ADwX8sQslnRJuqO3FXA8rS/GfhnYD0FIx2AZyTNlDTXLSOrJBEx1PwVnxykbgA3TWxEZqVTipZ+AD+VtF3SmlQ2rpEOHuVgZjYxStHSvyIi9kg6G9gq6TfD1C1qpENEtAKtAHV1dZ4i0sysRMbd0o+IPelxP/A4cAke6WBmVpHGlfQlTZc0I78PXAXswiMdzMwq0ni7d84BHk+rM70H2BIRP5b0K+DRNOrh98BnU/0ngWvIjXR4C/jCON/fzMxGYVxJPyJeBj48SPlreKSDmVnF8R25ZmYZ4qRvZpYhTvpmZhniNXLNpjAvkm6j5aRvVsX8R8H6c/eOmVmGuKVvViXcqrdiuKVvZpYhTvpmZhnipG9mliFO+mZmGeKkb2aWIR69Y5YRHt1j4Ja+mVmmuKVvVoUKW/VmhdzSNzPLELf0zTLOff3ZMuaWvqRzJXVI6pHULenLqfxWSX+QtCNt1xSc8zVJvZJekHR1KX6Aajd79mwkFb0BRdedPXt2mX86M5ts42npHwO+GhHPpsXRt0vamp67KyL+rrCypMXA9cDFwDzgnyRdFBHHxxFD1Xv99dfJrTJZevk/EmaWHWNu6UfE3oh4Nu0fBnqA+cOcsgp4JCLeiYjfklsc/ZKxvr9ZOUh6RdLz6VtsVyqbLWmrpBfT46xyx2k2lJJcyJW0EFgG/CIVrZP0nKRNBf8B5gOvFpzWxxB/JCStkdQlqevAgQOlCNGslFZExNKIqEvHG4BtEbEI2JaOzSrSuJO+pPcD3wO+EhGHgPuADwJLgb3AN/JVBzl90H6LiGiNiLqIqJszZ854QzSbaKuAzWl/M3BdGWMxG9a4kr6kGnIJ/+GI+D5AROyLiOMRcQJ4gHe7cPqAcwtOXwDsGc/7m5VBAD+VtF3SmlR2TkTshVy3J3D2YCf6G6xVgjFfyFXuKmAb0BMR3ywon5v/DwB8BtiV9p8Atkj6JrkLuYuAX471/c3K5IqI2CPpbGCrpN8Ue2JEtAKtAHV1dRNzdb5Ivnkru8YzeucK4HPA85J2pLJbgAZJS8m1iF4BvggQEd2SHgV2kxv5c5NH7thUExF70uN+SY+T+ya7L9/YkTQX2F/WIM2GMeakHxGdDN5P/+Qw57QALWN9T7NykjQdOC0iDqf9q4C/JfctdjVwR3r8YfmiNBue78g1K945wOPp/ob3AFsi4seSfgU8KqkR+D3w2TLGaDYsJ32zIkXEy8CHByl/Dfjk5EdkNnqecM3MLEOc9M3MMsRJ38wsQ5z0zcwyxEnfzCxDnPTNzDLEQzYrXHz9TLj1Lybutc0sU5z0K5xuOzShi6jErRPy0mZWody9Y2aWIW7pm9lJg82+6cXSq4tb+mZmGeKkb2aWIU76ZmYZ4j79KSBN5Vtys2bNGrmSmVUVJ/0KN9rhmpImbIinmU19k969I2mlpBck9UraMNnvb2aWZZPa0pc0DbgH+BTQB/xK0hMRsXsy4zCz4hUO4/Twzalvslv6lwC9EfFyRBwBHgFWTXIMZmaZNdlJfz7wasFxXyo7haQ1krokdR04cGDSgptKJA26DfWcmRlMftIfLPsMuOoYEa0RURcRdXPmzJmEsKaeiBjVZmYGk5/0+4BzC44XAHsmOQYzK4GFG3406LQNVtkmO+n/Clgk6XxJpwPXA09McgxmZpk1qaN3IuKYpHXAT4BpwKaI6J7MGMzMsmzSb86KiCeBJyf7fc1sYgzVxePhnZXJd+Sa2YTwNM2VyUnfrEQkrQT+N7muy+9ExB1lDqnkxnvhdrQ3euXr+49F6Tjpm5WA7zafWL4ruHSc9M1K4+Td5gCS8nebO+kXYTTfIEa6hlCKawyT/UdmNN9oxvvtR5V+446kA8Dvyh3HFHIW8MdyBzGF/NuIGPcdgJL+GlgZEf8lHX8OuDQi1hXUWQOsSYcfAl4Y4uUq8TN0TCOrpHiG/L2u+JZ+Kf5DZomkroioK3ccGTTi3eYR0Qq0jvhCFfgZOqaRVVo8Q/HKWWal4bvNbUpw0jcrDd9tblNCxXfv2KiN2H1gpVfiu80r8TN0TCOrtHgGVfEXcs3MrHTcvWNmliFO+mZmGeKkXyUkbZK0X9KucsdiYyNppaQXJPVK2jDJ7/2KpOcl7ZDUlcpmS9oq6cX0OCuVS9K3UpzPSfpIiWIY8Ds8lhgkrU71X5S0egJiulXSH9K/1Q5J1xQ897UU0wuSri4oL9tnO8BoV2DyVpkb8HHgI8CucsfibUyf3zTgJeAC4HRgJ7B4Et//FeCsfmV3AhvS/gZgY9q/BniK3L0JlwG/KFEMA36HRxsDMBt4OT3OSvuzShzTrcDNg9RdnD639wLnp89zWrk/2/6bW/pVIiKeBg6WOw4bs5PTOETEESA/jUM5rQI2p/3NwHUF5Q9FzjPATElzx/tmQ/wOjzaGq4GtEXEwIl4HtgIrSxzTUFYBj0TEOxHxW6CX3OdaUZ+tk75ZZZgPvFpw3JfKJksAP5W0PU0XAXBOROwFSI9np/LJjHW0MUxWbOtSt9KmfJdTBcRUFCd9s8ow4jQOE+yKiPgI8GngJkkfH6ZuuWMdLobJiO0+4IPAUmAv8I0KiKloTvpmlaGs0zhExJ70uB94nFyXxL58t0163F+GWEcbw4THFhH7IuJ4RJwAHiD3b1XWmEbDSd+sMpRtGgdJ0yXNyO8DVwG70vvnR7+sBn6Y9p8APp9G0FwG/CnfBTMBRhvDT4CrJM1K3S5XpbKS6Xf94jPk/q3yMV0v6b2SzgcWAb+k0qboKNcVZG+l3YB2cl81j5JrWTSWOyZvo/4MrwH+ldxIj+ZJfN8LyI0o2Ql0598b+ACwDXgxPc5O5SK3YMxLwPNAXYniGPA7PJYYgBvJXUTtBb4wATH9fXrP58gl77kF9ZtTTC8Any73ZzvY5mkYzMwyxN07ZmYZ4qRvZpYhTvpmZhnipG9mliFO+mZmGeKkb2aWIU76ZmYZ8v8BvNZYS6tJWloAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "DH_X_test = []\n",
    "DH_Y_test = []  \n",
    "\n",
    "for i in range(DHtrain_data_size):\n",
    "    DH_X_test.append(DR_encoded[DHtrain_data_size+i])\n",
    "    DH_Y_test.append([1,0])\n",
    "    DH_X_test.append(HS_encoded[DHtrain_data_size+i])\n",
    "    DH_Y_test.append([0,1])\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "len_result = [len(s) for s in DH_X_test]\n",
    "print(\"줄거리 최대 길이 : \",max(len_result))\n",
    "print(\"줄거리 평균 길이 : \",sum(len_result)/len(len_result))\n",
    "\n",
    "plt.subplot(1,2,1)\n",
    "plt.boxplot(len_result)\n",
    "plt.subplot(1,2,2)\n",
    "plt.hist(len_result, bins=50)\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 1400 samples, validate on 1400 samples\n",
      "Epoch 1/3\n",
      "  64/1400 [>.............................] - ETA: 31sWARNING:tensorflow:Early stopping conditioned on metric `val_loss` which is not available. Available metrics are: \n",
      "WARNING:tensorflow:Can save best model only with val_acc available, skipping.\n"
     ]
    },
    {
     "ename": "InvalidArgumentError",
     "evalue": " indices[46,167] = 15001 is not in [0, 5002)\n\t [[node sequential_9/embedding_9/embedding_lookup (defined at <ipython-input-49-edff8fe77726>:22) ]] [Op:__inference_distributed_function_17701]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node sequential_9/embedding_9/embedding_lookup:\n sequential_9/embedding_9/embedding_lookup/16556 (defined at C:\\ProgramData\\Anaconda3\\lib\\contextlib.py:112)\n\nFunction call stack:\ndistributed_function\n",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mInvalidArgumentError\u001b[0m                      Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-49-edff8fe77726>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     20\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     21\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'binary_crossentropy'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'adam'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'acc'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 22\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mDH_X_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDH_Y_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mDH_X_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDH_Y_test\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m64\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mes\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmc\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[0;32m    817\u001b[0m         \u001b[0mmax_queue_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    818\u001b[0m         \u001b[0mworkers\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 819\u001b[1;33m         use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[0;32m    820\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    821\u001b[0m   def evaluate(self,\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[0;32m    340\u001b[0m                 \u001b[0mmode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mModeKeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    341\u001b[0m                 \u001b[0mtraining_context\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtraining_context\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 342\u001b[1;33m                 total_epochs=epochs)\n\u001b[0m\u001b[0;32m    343\u001b[0m             \u001b[0mcbks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmake_logs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepoch_logs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraining_result\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mModeKeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    344\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2.py\u001b[0m in \u001b[0;36mrun_one_epoch\u001b[1;34m(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)\u001b[0m\n\u001b[0;32m    126\u001b[0m         step=step, mode=mode, size=current_batch_size) as batch_logs:\n\u001b[0;32m    127\u001b[0m       \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 128\u001b[1;33m         \u001b[0mbatch_outs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    129\u001b[0m       \u001b[1;32mexcept\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mStopIteration\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mOutOfRangeError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    130\u001b[0m         \u001b[1;31m# TODO(kaftan): File bug about tf function and errors.OutOfRangeError?\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2_utils.py\u001b[0m in \u001b[0;36mexecution_function\u001b[1;34m(input_fn)\u001b[0m\n\u001b[0;32m     96\u001b[0m     \u001b[1;31m# `numpy` translates Tensors to values in Eager mode.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     97\u001b[0m     return nest.map_structure(_non_none_constant_value,\n\u001b[1;32m---> 98\u001b[1;33m                               distributed_function(input_fn))\n\u001b[0m\u001b[0;32m     99\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    100\u001b[0m   \u001b[1;32mreturn\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m    566\u001b[0m         \u001b[0mxla_context\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    567\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 568\u001b[1;33m       \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    569\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    570\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m_call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m    630\u001b[0m         \u001b[1;31m# Lifting succeeded, so variables are initialized and we can run the\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    631\u001b[0m         \u001b[1;31m# stateless function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 632\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    633\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    634\u001b[0m       \u001b[0mcanon_args\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcanon_kwds\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   2361\u001b[0m     \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2362\u001b[0m       \u001b[0mgraph_function\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2363\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m  \u001b[1;31m# pylint: disable=protected-access\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2364\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2365\u001b[0m   \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[1;34m(self, args, kwargs)\u001b[0m\n\u001b[0;32m   1609\u001b[0m          if isinstance(t, (ops.Tensor,\n\u001b[0;32m   1610\u001b[0m                            resource_variable_ops.BaseResourceVariable))),\n\u001b[1;32m-> 1611\u001b[1;33m         self.captured_inputs)\n\u001b[0m\u001b[0;32m   1612\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1613\u001b[0m   \u001b[1;32mdef\u001b[0m \u001b[0m_call_flat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcaptured_inputs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcancellation_manager\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m   1690\u001b[0m       \u001b[1;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1691\u001b[0m       return self._build_call_outputs(self._inference_function.call(\n\u001b[1;32m-> 1692\u001b[1;33m           ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[0;32m   1693\u001b[0m     forward_backward = self._select_forward_and_backward_functions(\n\u001b[0;32m   1694\u001b[0m         \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36mcall\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m    543\u001b[0m               \u001b[0minputs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    544\u001b[0m               \u001b[0mattrs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"executor_type\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexecutor_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"config_proto\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 545\u001b[1;33m               ctx=ctx)\n\u001b[0m\u001b[0;32m    546\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    547\u001b[0m           outputs = execute.execute_with_cancellation(\n",
      "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m     65\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     66\u001b[0m       \u001b[0mmessage\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 67\u001b[1;33m     \u001b[0msix\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_status_to_exception\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmessage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     68\u001b[0m   \u001b[1;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     69\u001b[0m     keras_symbolic_tensors = [\n",
      "\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\six.py\u001b[0m in \u001b[0;36mraise_from\u001b[1;34m(value, from_value)\u001b[0m\n",
      "\u001b[1;31mInvalidArgumentError\u001b[0m:  indices[46,167] = 15001 is not in [0, 5002)\n\t [[node sequential_9/embedding_9/embedding_lookup (defined at <ipython-input-49-edff8fe77726>:22) ]] [Op:__inference_distributed_function_17701]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node sequential_9/embedding_9/embedding_lookup:\n sequential_9/embedding_9/embedding_lookup/16556 (defined at C:\\ProgramData\\Anaconda3\\lib\\contextlib.py:112)\n\nFunction call stack:\ndistributed_function\n"
     ]
    }
   ],
   "source": [
    "max_len = 200\n",
    "\n",
    "DH_X_train = pad_sequences(DH_X_train, maxlen=max_len)\n",
    "DH_X_test = pad_sequences(DH_X_test, maxlen=max_len)\n",
    "\n",
    "\n",
    "model = Sequential()\n",
    "model.add(Embedding(5002, 120))\n",
    "model.add(LSTM(120))\n",
    "model.add(Dense(2, activation='sigmoid'))\n",
    "\n",
    "es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)\n",
    "mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)\n",
    "\n",
    "DH_X_train = np.array(DH_X_train)\n",
    "DH_X_test = np.array(DH_X_test)\n",
    "DH_Y_train = np.array(DH_Y_train)\n",
    "DH_Y_test = np.array(DH_Y_test)\n",
    "\n",
    "\n",
    "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])\n",
    "model.fit(DH_X_train, DH_Y_train, validation_data=(DH_X_test, DH_Y_test), epochs=3, batch_size=64, callbacks=[es, mc])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}