adding joint market data

양선아
Commit 1e7682ec496dbf0cdb382259fd995ac9bf7e4321 1e7682ec 1 parent 6eddfcc2
Showing 1 changed file with 77 additions and 0 deletions
data/jointmarketdata.py
--- a/data/jointmarketdata.py 0 → 100644
View file @1e7682e
+++ b/data/jointmarketdata.py 0 → 100644
View file @1e7682e
+# -*- coding: utf-8 -*-
+
+from bs4 import BeautifulSoup
+from urllib.request import urlopen
+import pandas as pd
+import requests
+import os
+import datetime
+
+url = 'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_20160624000000000349_1/'
+
+def makecsvfile(day):
+
+    # file name setting
+    output_file = str(day)+'data.csv'
+
+    # dataframe 생성
+    df = pd.DataFrame(columns=['row_num','aucng_de','cpr_nm','cpr_cd','cpr_type_nm','cpr_type_cd','prdlst_nm','prdlst_cd','spcies_nm','spcies_cd','delngbundle_qy','stndrd','stndrd_cd','grad','grad_cd','sanco','sannm','mumm_amt','avrg_amt','mxmm_amt','delng_qy','auc_co'])
+    
+    # 기본 number setting
+    i = 0 # 날짜별 row
+    number = 0
+    
+    while(True):
+
+        # url 생성 & data 받아오기
+        myurl = url + str(number*1000+1) + '/'+str((number+1)*1000) + '?AUCNG_DE='+str(day)
+        data = urlopen(myurl).read()
+        soup = BeautifulSoup(data, 'html.parser')
+
+        # data error check
+        result_code = soup.find('result')
+        result_code = result_code.code.string
+        if result_code != 'INFO-000':
+            print(result_code)
+            break
+        
+        # data number check
+        start_num = int(str(soup.find('startrow'))[10:-11])
+        total_num = int(str(soup.find('totalcnt'))[10:-11])
+        print(str(soup.find('startrow'))[10:-11])
+        if total_num < start_num :
+            print('find all')
+            break
+
+        # if result is fine
+        items = soup.find_all('row')
+        for item in items:
+            df.loc[i] = [item.row_num.string, item.aucng_de.string, item.cpr_nm.string, item.cpr_cd.string, item.cpr_type_nm.string, item.cpr_type_cd.string, item.prdlst_nm.string, item.prdlst_cd.string, item.spcies_nm.string, item.spcies_cd.string, item.delngbundle_qy.string, item.stndrd.string, item.stndrd_cd.string, item.grad.string, item.grad_cd.string, item.sanco.string, item.sannm.string, item.mumm_amt.string, item.avrg_amt.string, item.mxmm_amt.string, item.delng_qy.string, item.auc_co.string]
+            i += 1
+        
+        # 다음 1000개
+        number += 1
+
+    # 결과 확인을 위한 print
+    print(str(day), ' : ', str(i))
+    # csv 파일로 내보내기
+    df.to_csv(os.path.join('./jointmarketdata', output_file), encoding='euc-kr', index=False)
+
+def checkdatanum(day):
+    myurl = url +'1/1?AUCNG_DE='+str(day)
+
+    req = requests.get(myurl) 
+    html = req.text
+    soup = BeautifulSoup(html, 'html.parser')
+    product_num = soup.find('totalcnt')
+    product_num = int(str(product_num)[10:-11])
+    print(day,':',product_num)
+    return product_num
+
+i = 0
+last_day = datetime.date(2020,5,5)
+first_day = datetime.date(2020,5,1) - datetime.timedelta(days=1)
+
+while(first_day < last_day):
+    first_day += datetime.timedelta(days=1)
+    makecsvfile(first_day.strftime('%Y%m%d'))