양선아

adding joint market data

1 +# -*- coding: utf-8 -*-
2 +
3 +from bs4 import BeautifulSoup
4 +from urllib.request import urlopen
5 +import pandas as pd
6 +import requests
7 +import os
8 +import datetime
9 +
10 +url = 'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_20160624000000000349_1/'
11 +
12 +def makecsvfile(day):
13 +
14 + # file name setting
15 + output_file = str(day)+'data.csv'
16 +
17 + # dataframe 생성
18 + df = pd.DataFrame(columns=['row_num','aucng_de','cpr_nm','cpr_cd','cpr_type_nm','cpr_type_cd','prdlst_nm','prdlst_cd','spcies_nm','spcies_cd','delngbundle_qy','stndrd','stndrd_cd','grad','grad_cd','sanco','sannm','mumm_amt','avrg_amt','mxmm_amt','delng_qy','auc_co'])
19 +
20 + # 기본 number setting
21 + i = 0 # 날짜별 row
22 + number = 0
23 +
24 + while(True):
25 +
26 + # url 생성 & data 받아오기
27 + myurl = url + str(number*1000+1) + '/'+str((number+1)*1000) + '?AUCNG_DE='+str(day)
28 + data = urlopen(myurl).read()
29 + soup = BeautifulSoup(data, 'html.parser')
30 +
31 + # data error check
32 + result_code = soup.find('result')
33 + result_code = result_code.code.string
34 + if result_code != 'INFO-000':
35 + print(result_code)
36 + break
37 +
38 + # data number check
39 + start_num = int(str(soup.find('startrow'))[10:-11])
40 + total_num = int(str(soup.find('totalcnt'))[10:-11])
41 + print(str(soup.find('startrow'))[10:-11])
42 + if total_num < start_num :
43 + print('find all')
44 + break
45 +
46 + # if result is fine
47 + items = soup.find_all('row')
48 + for item in items:
49 + df.loc[i] = [item.row_num.string, item.aucng_de.string, item.cpr_nm.string, item.cpr_cd.string, item.cpr_type_nm.string, item.cpr_type_cd.string, item.prdlst_nm.string, item.prdlst_cd.string, item.spcies_nm.string, item.spcies_cd.string, item.delngbundle_qy.string, item.stndrd.string, item.stndrd_cd.string, item.grad.string, item.grad_cd.string, item.sanco.string, item.sannm.string, item.mumm_amt.string, item.avrg_amt.string, item.mxmm_amt.string, item.delng_qy.string, item.auc_co.string]
50 + i += 1
51 +
52 + # 다음 1000개
53 + number += 1
54 +
55 + # 결과 확인을 위한 print
56 + print(str(day), ' : ', str(i))
57 + # csv 파일로 내보내기
58 + df.to_csv(os.path.join('./jointmarketdata', output_file), encoding='euc-kr', index=False)
59 +
60 +def checkdatanum(day):
61 + myurl = url +'1/1?AUCNG_DE='+str(day)
62 +
63 + req = requests.get(myurl)
64 + html = req.text
65 + soup = BeautifulSoup(html, 'html.parser')
66 + product_num = soup.find('totalcnt')
67 + product_num = int(str(product_num)[10:-11])
68 + print(day,':',product_num)
69 + return product_num
70 +
71 +i = 0
72 +last_day = datetime.date(2020,5,5)
73 +first_day = datetime.date(2020,5,1) - datetime.timedelta(days=1)
74 +
75 +while(first_day < last_day):
76 + first_day += datetime.timedelta(days=1)
77 + makecsvfile(first_day.strftime('%Y%m%d'))