Showing
1 changed file
with
77 additions
and
0 deletions
data/jointmarketdata.py
0 → 100644
| 1 | +# -*- coding: utf-8 -*- | ||
| 2 | + | ||
| 3 | +from bs4 import BeautifulSoup | ||
| 4 | +from urllib.request import urlopen | ||
| 5 | +import pandas as pd | ||
| 6 | +import requests | ||
| 7 | +import os | ||
| 8 | +import datetime | ||
| 9 | + | ||
| 10 | +url = 'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_20160624000000000349_1/' | ||
| 11 | + | ||
| 12 | +def makecsvfile(day): | ||
| 13 | + | ||
| 14 | + # file name setting | ||
| 15 | + output_file = str(day)+'data.csv' | ||
| 16 | + | ||
| 17 | + # dataframe 생성 | ||
| 18 | + df = pd.DataFrame(columns=['row_num','aucng_de','cpr_nm','cpr_cd','cpr_type_nm','cpr_type_cd','prdlst_nm','prdlst_cd','spcies_nm','spcies_cd','delngbundle_qy','stndrd','stndrd_cd','grad','grad_cd','sanco','sannm','mumm_amt','avrg_amt','mxmm_amt','delng_qy','auc_co']) | ||
| 19 | + | ||
| 20 | + # 기본 number setting | ||
| 21 | + i = 0 # 날짜별 row | ||
| 22 | + number = 0 | ||
| 23 | + | ||
| 24 | + while(True): | ||
| 25 | + | ||
| 26 | + # url 생성 & data 받아오기 | ||
| 27 | + myurl = url + str(number*1000+1) + '/'+str((number+1)*1000) + '?AUCNG_DE='+str(day) | ||
| 28 | + data = urlopen(myurl).read() | ||
| 29 | + soup = BeautifulSoup(data, 'html.parser') | ||
| 30 | + | ||
| 31 | + # data error check | ||
| 32 | + result_code = soup.find('result') | ||
| 33 | + result_code = result_code.code.string | ||
| 34 | + if result_code != 'INFO-000': | ||
| 35 | + print(result_code) | ||
| 36 | + break | ||
| 37 | + | ||
| 38 | + # data number check | ||
| 39 | + start_num = int(str(soup.find('startrow'))[10:-11]) | ||
| 40 | + total_num = int(str(soup.find('totalcnt'))[10:-11]) | ||
| 41 | + print(str(soup.find('startrow'))[10:-11]) | ||
| 42 | + if total_num < start_num : | ||
| 43 | + print('find all') | ||
| 44 | + break | ||
| 45 | + | ||
| 46 | + # if result is fine | ||
| 47 | + items = soup.find_all('row') | ||
| 48 | + for item in items: | ||
| 49 | + df.loc[i] = [item.row_num.string, item.aucng_de.string, item.cpr_nm.string, item.cpr_cd.string, item.cpr_type_nm.string, item.cpr_type_cd.string, item.prdlst_nm.string, item.prdlst_cd.string, item.spcies_nm.string, item.spcies_cd.string, item.delngbundle_qy.string, item.stndrd.string, item.stndrd_cd.string, item.grad.string, item.grad_cd.string, item.sanco.string, item.sannm.string, item.mumm_amt.string, item.avrg_amt.string, item.mxmm_amt.string, item.delng_qy.string, item.auc_co.string] | ||
| 50 | + i += 1 | ||
| 51 | + | ||
| 52 | + # 다음 1000개 | ||
| 53 | + number += 1 | ||
| 54 | + | ||
| 55 | + # 결과 확인을 위한 print | ||
| 56 | + print(str(day), ' : ', str(i)) | ||
| 57 | + # csv 파일로 내보내기 | ||
| 58 | + df.to_csv(os.path.join('./jointmarketdata', output_file), encoding='euc-kr', index=False) | ||
| 59 | + | ||
| 60 | +def checkdatanum(day): | ||
| 61 | + myurl = url +'1/1?AUCNG_DE='+str(day) | ||
| 62 | + | ||
| 63 | + req = requests.get(myurl) | ||
| 64 | + html = req.text | ||
| 65 | + soup = BeautifulSoup(html, 'html.parser') | ||
| 66 | + product_num = soup.find('totalcnt') | ||
| 67 | + product_num = int(str(product_num)[10:-11]) | ||
| 68 | + print(day,':',product_num) | ||
| 69 | + return product_num | ||
| 70 | + | ||
| 71 | +i = 0 | ||
| 72 | +last_day = datetime.date(2020,5,5) | ||
| 73 | +first_day = datetime.date(2020,5,1) - datetime.timedelta(days=1) | ||
| 74 | + | ||
| 75 | +while(first_day < last_day): | ||
| 76 | + first_day += datetime.timedelta(days=1) | ||
| 77 | + makecsvfile(first_day.strftime('%Y%m%d')) |
-
Please register or login to post a comment