Showing
1 changed file
with
77 additions
and
0 deletions
data/jointmarketdata.py
0 → 100644
1 | +# -*- coding: utf-8 -*- | ||
2 | + | ||
3 | +from bs4 import BeautifulSoup | ||
4 | +from urllib.request import urlopen | ||
5 | +import pandas as pd | ||
6 | +import requests | ||
7 | +import os | ||
8 | +import datetime | ||
9 | + | ||
10 | +url = 'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_20160624000000000349_1/' | ||
11 | + | ||
12 | +def makecsvfile(day): | ||
13 | + | ||
14 | + # file name setting | ||
15 | + output_file = str(day)+'data.csv' | ||
16 | + | ||
17 | + # dataframe 생성 | ||
18 | + df = pd.DataFrame(columns=['row_num','aucng_de','cpr_nm','cpr_cd','cpr_type_nm','cpr_type_cd','prdlst_nm','prdlst_cd','spcies_nm','spcies_cd','delngbundle_qy','stndrd','stndrd_cd','grad','grad_cd','sanco','sannm','mumm_amt','avrg_amt','mxmm_amt','delng_qy','auc_co']) | ||
19 | + | ||
20 | + # 기본 number setting | ||
21 | + i = 0 # 날짜별 row | ||
22 | + number = 0 | ||
23 | + | ||
24 | + while(True): | ||
25 | + | ||
26 | + # url 생성 & data 받아오기 | ||
27 | + myurl = url + str(number*1000+1) + '/'+str((number+1)*1000) + '?AUCNG_DE='+str(day) | ||
28 | + data = urlopen(myurl).read() | ||
29 | + soup = BeautifulSoup(data, 'html.parser') | ||
30 | + | ||
31 | + # data error check | ||
32 | + result_code = soup.find('result') | ||
33 | + result_code = result_code.code.string | ||
34 | + if result_code != 'INFO-000': | ||
35 | + print(result_code) | ||
36 | + break | ||
37 | + | ||
38 | + # data number check | ||
39 | + start_num = int(str(soup.find('startrow'))[10:-11]) | ||
40 | + total_num = int(str(soup.find('totalcnt'))[10:-11]) | ||
41 | + print(str(soup.find('startrow'))[10:-11]) | ||
42 | + if total_num < start_num : | ||
43 | + print('find all') | ||
44 | + break | ||
45 | + | ||
46 | + # if result is fine | ||
47 | + items = soup.find_all('row') | ||
48 | + for item in items: | ||
49 | + df.loc[i] = [item.row_num.string, item.aucng_de.string, item.cpr_nm.string, item.cpr_cd.string, item.cpr_type_nm.string, item.cpr_type_cd.string, item.prdlst_nm.string, item.prdlst_cd.string, item.spcies_nm.string, item.spcies_cd.string, item.delngbundle_qy.string, item.stndrd.string, item.stndrd_cd.string, item.grad.string, item.grad_cd.string, item.sanco.string, item.sannm.string, item.mumm_amt.string, item.avrg_amt.string, item.mxmm_amt.string, item.delng_qy.string, item.auc_co.string] | ||
50 | + i += 1 | ||
51 | + | ||
52 | + # 다음 1000개 | ||
53 | + number += 1 | ||
54 | + | ||
55 | + # 결과 확인을 위한 print | ||
56 | + print(str(day), ' : ', str(i)) | ||
57 | + # csv 파일로 내보내기 | ||
58 | + df.to_csv(os.path.join('./jointmarketdata', output_file), encoding='euc-kr', index=False) | ||
59 | + | ||
60 | +def checkdatanum(day): | ||
61 | + myurl = url +'1/1?AUCNG_DE='+str(day) | ||
62 | + | ||
63 | + req = requests.get(myurl) | ||
64 | + html = req.text | ||
65 | + soup = BeautifulSoup(html, 'html.parser') | ||
66 | + product_num = soup.find('totalcnt') | ||
67 | + product_num = int(str(product_num)[10:-11]) | ||
68 | + print(day,':',product_num) | ||
69 | + return product_num | ||
70 | + | ||
71 | +i = 0 | ||
72 | +last_day = datetime.date(2020,5,5) | ||
73 | +first_day = datetime.date(2020,5,1) - datetime.timedelta(days=1) | ||
74 | + | ||
75 | +while(first_day < last_day): | ||
76 | + first_day += datetime.timedelta(days=1) | ||
77 | + makecsvfile(first_day.strftime('%Y%m%d')) |
-
Please register or login to post a comment