data collection2.ipynb 7.36 KB
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# module\n",
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = pd.read_csv('/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/recent_test/recent_data.csv')\n",
    "validation = pd.read_csv('/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/validation.csv')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "del test['Unnamed: 0']\n",
    "del validation['Unnamed: 0']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 지수이동평균 - 종가"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "##----test_data----##\n",
    "test_EMA_5 = test['close'].ewm(span=5).mean()\n",
    "test_EMA_10 = test['close'].ewm(span=10).mean()\n",
    "test_EMA_20 = test['close'].ewm(span=20).mean()\n",
    "test_EMA_60 = test['close'].ewm(span=60).mean()\n",
    "test_EMA_120 = test['close'].ewm(span=120).mean()\n",
    "\n",
    "test_EMA = pd.DataFrame({'5':test_EMA_5,'10':test_EMA_10,'20':test_EMA_20,'60':test_EMA_60,'120':test_EMA_120})\n",
    "test_EMA.to_csv(\"/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/MA/Exponential/test_EMA.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "##----val_data----##\n",
    "val_EMA_5 = validation['close'].ewm(span=5).mean()\n",
    "val_EMA_10 = validation['close'].ewm(span=10).mean()\n",
    "val_EMA_20 = validation['close'].ewm(span=20).mean()\n",
    "val_EMA_60 = validation['close'].ewm(span=60).mean()\n",
    "val_EMA_120 = validation['close'].ewm(span=120).mean()\n",
    "\n",
    "val_EMA = pd.DataFrame({'5':val_EMA_5,'10':val_EMA_10,'20':val_EMA_20,'60':val_EMA_60,'120':val_EMA_120})\n",
    "val_EMA.to_csv(\"/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/MA/Exponential/val_EMA.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 지수이동평균 - 거래량"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "##----test_data----##\n",
    "test_EVMA_5 = test['vol'].ewm(span=5).mean()\n",
    "test_EVMA_10 = test['vol'].ewm(span=10).mean()\n",
    "test_EVMA_20 = test['vol'].ewm(span=20).mean()\n",
    "test_EVMA_60 = test['vol'].ewm(span=60).mean()\n",
    "test_EVMA_120 = test['vol'].ewm(span=120).mean()\n",
    "\n",
    "test_EVMA = pd.DataFrame({'5':test_EVMA_5,'10':test_EVMA_10,'20':test_EVMA_20,'60':test_EVMA_60,'120':test_EVMA_120})\n",
    "test_EVMA.to_csv(\"/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/MA/Exponential/test_EVMA.csv\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "##----val_data----##\n",
    "val_EVMA_5 = validation['vol'].ewm(span=5).mean()\n",
    "val_EVMA_10 = validation['vol'].ewm(span=10).mean()\n",
    "val_EVMA_20 = validation['vol'].ewm(span=20).mean()\n",
    "val_EVMA_60 = validation['vol'].ewm(span=60).mean()\n",
    "val_EVMA_120 = validation['vol'].ewm(span=120).mean()\n",
    "\n",
    "val_EVMA = pd.DataFrame({'5':val_EVMA_5,'10':val_EVMA_10,'20':val_EVMA_20,'60':val_EVMA_60,'120':val_EVMA_120})\n",
    "val_EVMA.to_csv(\"/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/MA/Exponential/val_EVMA.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import MinMaxScaler\n",
    "scaler = MinMaxScaler()\n",
    "scale_cols = ['close','open','high','low','vol']\n",
    "scale_cols2 = ['5','10','20','60','120']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "##----test_data----##\n",
    "test_EMA = pd.read_csv('/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/MA/Exponential/test_EMA.csv')\n",
    "\n",
    "test_EMA_scaled = scaler.fit_transform(test_EMA[scale_cols2])\n",
    "test_EMA_scaled = pd.DataFrame(test_EMA_scaled)\n",
    "test_EMA_scaled.columns = ['5','10','20','60','120']\n",
    "\n",
    "##----val_data----##\n",
    "val_EMA = pd.read_csv('/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/MA/Exponential/val_EMA.csv')\n",
    "\n",
    "val_EMA_scaled = scaler.fit_transform(val_EMA[scale_cols2])\n",
    "val_EMA_scaled = pd.DataFrame(val_EMA_scaled)\n",
    "val_EMA_scaled.columns = ['5','10','20','60','120']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "##----test_data----##\n",
    "test_EVMA = pd.read_csv('/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/MA/Exponential/test_EVMA.csv')\n",
    "\n",
    "test_EVMA_scaled = scaler.fit_transform(test_EVMA[scale_cols2])\n",
    "test_EVMA_scaled = pd.DataFrame(test_EVMA_scaled)\n",
    "test_EVMA_scaled.columns = ['5','10','20','60','120']\n",
    "\n",
    "##----val_data----##\n",
    "val_EVMA = pd.read_csv('/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/MA/Exponential/val_EVMA.csv')\n",
    "\n",
    "val_EVMA_scaled = scaler.fit_transform(val_EVMA[scale_cols2])\n",
    "val_EVMA_scaled = pd.DataFrame(val_EVMA_scaled)\n",
    "val_EVMA_scaled.columns = ['5','10','20','60','120']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_EMA_scaled.to_csv(\"/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/MA_scaled/close/exp/test_EMA_scaled.csv\")\n",
    "val_EMA_scaled.to_csv(\"/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/MA_scaled/close/exp/val_EMA_scaled.csv\")\n",
    "test_EVMA_scaled.to_csv(\"/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/MA_scaled/vol/exp/test_EVMA_scaled.csv\")\n",
    "val_EVMA_scaled.to_csv(\"/Users/yangyoonji/Documents/2020_2학기/캡스톤디자인/data/MA_scaled/vol/exp/val_EVMA_scaled.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}