recent_test_data-checkpoint.ipynb 10.1 KB
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "recent_data = pd.read_csv('/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/recent_test/recent_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 데이터 역순으로 변경\n",
    "recent_data = recent_data.loc[::-1]\n",
    "recent_data = recent_data.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>close</th>\n",
       "      <th>open</th>\n",
       "      <th>high</th>\n",
       "      <th>low</th>\n",
       "      <th>vol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20201029</td>\n",
       "      <td>309.94</td>\n",
       "      <td>308.99</td>\n",
       "      <td>310.56</td>\n",
       "      <td>306.63</td>\n",
       "      <td>1.3904e+08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20201030</td>\n",
       "      <td>301.60</td>\n",
       "      <td>308.91</td>\n",
       "      <td>309.08</td>\n",
       "      <td>301.58</td>\n",
       "      <td>1.7891e+08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20201102</td>\n",
       "      <td>305.95</td>\n",
       "      <td>302.67</td>\n",
       "      <td>306.43</td>\n",
       "      <td>301.37</td>\n",
       "      <td>1.1103e+08</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       date   close    open    high     low         vol\n",
       "0  20201029  309.94  308.99  310.56  306.63  1.3904e+08\n",
       "1  20201030  301.60  308.91  309.08  301.58  1.7891e+08\n",
       "2  20201102  305.95  302.67  306.43  301.37  1.1103e+08"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recent_data.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def vol_char_to_num(data):\n",
    "    for i in tqdm(range(len(data))):\n",
    "        unit = data['vol'][i][-1]\n",
    "        data['vol'][i] = data['vol'][i][:-1]\n",
    "        if unit == 'K':\n",
    "            data['vol'][i] = float(data['vol'][i]) * 1000\n",
    "        elif unit == 'M':\n",
    "            data['vol'][i] = float(data['vol'][i]) * 1000000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  0%|          | 0/7 [00:00<?, ?it/s]/Users/yangyoonji/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  after removing the cwd from sys.path.\n",
      "/Users/yangyoonji/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:8: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \n",
      " 57%|█████▋    | 4/7 [00:00<00:00,  4.51it/s]/Users/yangyoonji/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  \n",
      "100%|██████████| 7/7 [00:01<00:00,  4.77it/s]\n"
     ]
    }
   ],
   "source": [
    "vol_char_to_num(recent_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import MinMaxScaler\n",
    "scaler = MinMaxScaler()\n",
    "scale_cols = ['close','open','high','low','vol']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "recent_data_scaled = scaler.fit_transform(recent_data[scale_cols]) \n",
    "recent_data_scaled = pd.DataFrame(recent_data_scaled)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.406829</td>\n",
       "      <td>0.315212</td>\n",
       "      <td>0.231373</td>\n",
       "      <td>0.273389</td>\n",
       "      <td>0.746880</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.311222</td>\n",
       "      <td>0.148459</td>\n",
       "      <td>0.010915</td>\n",
       "      <td>0.961373</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.212195</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.596191</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.510732</td>\n",
       "      <td>0.272818</td>\n",
       "      <td>0.331653</td>\n",
       "      <td>0.345114</td>\n",
       "      <td>0.692866</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.596585</td>\n",
       "      <td>0.580549</td>\n",
       "      <td>0.514846</td>\n",
       "      <td>0.519751</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.971220</td>\n",
       "      <td>0.651870</td>\n",
       "      <td>0.844818</td>\n",
       "      <td>0.722453</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.773348</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1         2         3         4\n",
       "0  0.406829  0.315212  0.231373  0.273389  0.746880\n",
       "1  0.000000  0.311222  0.148459  0.010915  0.961373\n",
       "2  0.212195  0.000000  0.000000  0.000000  0.596191\n",
       "3  0.510732  0.272818  0.331653  0.345114  0.692866\n",
       "4  0.596585  0.580549  0.514846  0.519751  0.000000\n",
       "5  0.971220  0.651870  0.844818  0.722453  1.000000\n",
       "6  1.000000  1.000000  1.000000  1.000000  0.773348"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "recent_data_scaled"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "recent_data_scaled.to_csv('/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/recent_test/recent_data_scaled.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "checking if any null values are present in train\n",
      " 0    0\n",
      "1    0\n",
      "2    0\n",
      "3    0\n",
      "4    0\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(\"checking if any null values are present in train\\n\", recent_data_scaled.isna().sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}