Showing
3 changed files
with
78 additions
and
0 deletions
data/datacrawling.py
0 → 100644
1 | +# -*- coding: utf-8 -*- | ||
2 | + | ||
3 | +from bs4 import BeautifulSoup | ||
4 | +from urllib.request import urlopen | ||
5 | +import pandas as pd | ||
6 | +import requests | ||
7 | +import os | ||
8 | + | ||
9 | +url = 'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_20141119000000000012_1/' | ||
10 | + | ||
11 | +number = 0 | ||
12 | + | ||
13 | +def makecsvfile(day): | ||
14 | + | ||
15 | + # file name setting | ||
16 | + output_file = str(day)+'data.csv' | ||
17 | + | ||
18 | + # dataframe 생성 | ||
19 | + df = pd.DataFrame(columns=['row_num','aucng_de', 'pblmng_whsal_mrkt_nm','pblmng_whsal_mrkt_cd', 'cpr_nm', 'cpr_cd', 'prdlst_nm', 'prdlst_cd', 'spcies_nm','spcies_cd','grad','grad_cd','delngbundle_qy','stndrd','stndrd_cd','delng_qy','mumm_amt','avrg_amt','mxmm_amt','auc_co']) | ||
20 | + | ||
21 | + # 기본 number setting | ||
22 | + i = 0 # 날짜별 row | ||
23 | + number = 0 | ||
24 | + | ||
25 | + while(True): | ||
26 | + | ||
27 | + # url 생성 & data 받아오기 | ||
28 | + myurl = url + str(number*1000+1) + '/'+str((number+1)*1000) + '?AUCNG_DE='+str(day) | ||
29 | + data = urlopen(myurl).read() | ||
30 | + soup = BeautifulSoup(data, 'html.parser') | ||
31 | + | ||
32 | + # data error check | ||
33 | + result_code = soup.find('result') | ||
34 | + result_code = result_code.code.string | ||
35 | + if result_code != 'INFO-000': | ||
36 | + print(result_code) | ||
37 | + break | ||
38 | + | ||
39 | + # data number check | ||
40 | + start_num = int(str(soup.find('startrow'))[10:-11]) | ||
41 | + total_num = int(str(soup.find('totalcnt'))[10:-11]) | ||
42 | + print(str(soup.find('startrow'))[10:-11]) | ||
43 | + if total_num < start_num : | ||
44 | + print('find all') | ||
45 | + break | ||
46 | + | ||
47 | + # if result is fine | ||
48 | + items = soup.find_all('row') | ||
49 | + for item in items: | ||
50 | + df.loc[i] = [item.row_num.string, item.aucng_de.string, item.pblmng_whsal_mrkt_nm.string, item.pblmng_whsal_mrkt_cd.string, item.cpr_nm.string, item.cpr_cd.string, item.prdlst_nm.string, item.prdlst_cd.string, item.spcies_nm.string, item.spcies_cd.string, item.grad.string, item.grad_cd.string, item.delngbundle_qy.string, item.stndrd.string, item.stndrd_cd.string, item.delng_qy.string, item.mumm_amt.string, item.avrg_amt.string, item.mxmm_amt.string, item.auc_co.string] | ||
51 | + i += 1 | ||
52 | + | ||
53 | + # 다음 1000개 | ||
54 | + number += 1 | ||
55 | + | ||
56 | + # 결과 확인을 위한 print | ||
57 | + print(str(day), ' : ', str(i)) | ||
58 | + # csv 파일로 내보내기 | ||
59 | + df.to_csv(os.path.join('./', output_file), encoding='euc-kr', index=False) | ||
60 | + | ||
61 | +def checkdatanum(day): | ||
62 | + myurl = url +'1/1?AUCNG_DE='+str(day) | ||
63 | + | ||
64 | + req = requests.get(myurl) | ||
65 | + html = req.text | ||
66 | + soup = BeautifulSoup(html, 'html.parser') | ||
67 | + product_num = soup.find('totalcnt') | ||
68 | + product_num = int(str(product_num)[10:-11]) | ||
69 | + print(day,':',product_num) | ||
70 | + return product_num | ||
71 | + | ||
72 | + | ||
73 | +days=[20200413, 20200414, 20200415, 20200416, 20200417, 20200418, 20200412] | ||
74 | + | ||
75 | +for day in days: | ||
76 | + number += checkdatanum(day) | ||
77 | + | ||
78 | +print('week : ', number) |
~$진행과정.docx
0 → 100644
No preview for this file type
진행과정.docx
0 → 100644
No preview for this file type
-
Please register or login to post a comment