Showing
3 changed files
with
92 additions
and
5 deletions
... | @@ -7,7 +7,7 @@ import requests | ... | @@ -7,7 +7,7 @@ import requests |
7 | import os | 7 | import os |
8 | import datetime | 8 | import datetime |
9 | 9 | ||
10 | -url = 'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_20141119000000000012_1/' | 10 | +url = 'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_20160624000000000348_1/' |
11 | 11 | ||
12 | def makecsvfile(day): | 12 | def makecsvfile(day): |
13 | 13 | ||
... | @@ -15,7 +15,7 @@ def makecsvfile(day): | ... | @@ -15,7 +15,7 @@ def makecsvfile(day): |
15 | output_file = str(day)+'data.csv' | 15 | output_file = str(day)+'data.csv' |
16 | 16 | ||
17 | # dataframe 생성 | 17 | # dataframe 생성 |
18 | - df = pd.DataFrame(columns=['row_num','aucng_de', 'pblmng_whsal_mrkt_nm','pblmng_whsal_mrkt_cd', 'cpr_nm', 'cpr_cd', 'prdlst_nm', 'prdlst_cd', 'spcies_nm','spcies_cd','grad','grad_cd','delngbundle_qy','stndrd','stndrd_cd','delng_qy','mumm_amt','avrg_amt','mxmm_amt','auc_co']) | 18 | + df = pd.DataFrame(columns=['row_num','aucng_de', 'pblmng_whsal_mrkt_nm','pblmng_whsal_mrkt_cd', 'cpr_nm', 'cpr_cd', 'prdlst_nm', 'prdlst_cd', 'spcies_nm','spcies_cd','delngbundle_qy','stndrd','stndrd_cd','grad','grad_cd','sanji_cd','sanji_nm','mumm_amt','avrg_amt','mxmm_amt','delng_qy','cnts']) |
19 | 19 | ||
20 | # 기본 number setting | 20 | # 기본 number setting |
21 | i = 0 # 날짜별 row | 21 | i = 0 # 날짜별 row |
... | @@ -46,7 +46,7 @@ def makecsvfile(day): | ... | @@ -46,7 +46,7 @@ def makecsvfile(day): |
46 | # if result is fine | 46 | # if result is fine |
47 | items = soup.find_all('row') | 47 | items = soup.find_all('row') |
48 | for item in items: | 48 | for item in items: |
49 | - df.loc[i] = [item.row_num.string, item.aucng_de.string, item.pblmng_whsal_mrkt_nm.string, item.pblmng_whsal_mrkt_cd.string, item.cpr_nm.string, item.cpr_cd.string, item.prdlst_nm.string, item.prdlst_cd.string, item.spcies_nm.string, item.spcies_cd.string, item.grad.string, item.grad_cd.string, item.delngbundle_qy.string, item.stndrd.string, item.stndrd_cd.string, item.delng_qy.string, item.mumm_amt.string, item.avrg_amt.string, item.mxmm_amt.string, item.auc_co.string] | 49 | + df.loc[i] = [item.row_num.string, item.aucng_de.string, item.pblmng_whsal_mrkt_nm.string, item.pblmng_whsal_mrkt_cd.string, item.cpr_nm.string, item.cpr_cd.string, item.prdlst_nm.string, item.prdlst_cd.string, item.spcies_nm.string, item.spcies_cd.string, item.delngbundle_qy.string, item.stndrd.string, item.stndrd_cd.string, item.grad.string, item.grad_cd.string, item.mumm_amt.string, item.avrg_amt.string, item.mxmm_amt.string, item.delng_qy.string, item.cnts.string] |
50 | i += 1 | 50 | i += 1 |
51 | 51 | ||
52 | # 다음 1000개 | 52 | # 다음 1000개 | ... | ... |
data/distributiondata.py
0 → 100644
1 | +# -*- coding: utf-8 -*- | ||
2 | + | ||
3 | +from bs4 import BeautifulSoup | ||
4 | +from urllib.request import urlopen | ||
5 | +import pandas as pd | ||
6 | +import requests | ||
7 | +import os | ||
8 | +import datetime | ||
9 | + | ||
10 | +url = 'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_20160722000000000352_1/' | ||
11 | + | ||
12 | +# 원하는 사이트에 맞게 항목만 바꿔주면 되는 함수 | ||
13 | +def makecsvfile(day): | ||
14 | + | ||
15 | + # file name setting | ||
16 | + output_file = str(day)+'data.csv' | ||
17 | + | ||
18 | + # dataframe 생성 | ||
19 | + ########################## data column 변경 필수 ################################ | ||
20 | + df = pd.DataFrame(columns=['ROW_NUM','EXAMIN_DE','EXAMIN_SE_NM','EXAMIN_SE_CODE','EXAMIN_AREA_NAME','EXAMIN_AREA_CODE','EXAMIN_MRKT_NM'.'EXAMIN_MRKT_CODE','STD_MRKT_NM','STD_MRKT_CODE','EXAMIN_PRDLST_NM','EXAMIN_PRDLST_CODE','EXAMIN_SPCIES_NM','EXAMIN_SPCIES_CODE','STD_LCLAS_NM','STD_LCLAS_CO','STD_PRDLST_NM','STD_PRDLST_CODE','STD_SPCIES_NM','STD_SPCIES_CODE','EXAMIN_UNIT_NM','EXAMIN_UNIT','STD_UNIT_NM','STD_UNIT_CODE','EXAMIN_GRAD_NM','EXAMIN_GRAD_CODE','STD_GRAD_NM','STD_GRAD_CODE','TODAY_PRIC','BFRT_PRIC','IMP_TRADE','TRADE_AMT']) | ||
21 | + | ||
22 | + # 기본 number setting | ||
23 | + i = 0 # 날짜별 row | ||
24 | + number = 0 | ||
25 | + | ||
26 | + while(True): | ||
27 | + | ||
28 | + # url 생성 & data 받아오기 | ||
29 | + myurl = url + str(number*1000+1) + '/'+str((number+1)*1000) + '?AUCNG_DE='+str(day) | ||
30 | + data = urlopen(myurl).read() | ||
31 | + soup = BeautifulSoup(data, 'html.parser') | ||
32 | + | ||
33 | + # data error check | ||
34 | + result_code = soup.find('result') | ||
35 | + result_code = result_code.code.string | ||
36 | + if result_code != 'INFO-000': | ||
37 | + print(result_code) | ||
38 | + break | ||
39 | + | ||
40 | + # data number check | ||
41 | + start_num = int(str(soup.find('startrow'))[10:-11]) | ||
42 | + total_num = int(str(soup.find('totalcnt'))[10:-11]) | ||
43 | + print(str(soup.find('startrow'))[10:-11]) | ||
44 | + if total_num < start_num : | ||
45 | + print('find all') | ||
46 | + break | ||
47 | + | ||
48 | + # if result is fine | ||
49 | + items = soup.find_all('row') | ||
50 | + for item in items: | ||
51 | + ########################################## data column change ########################################## | ||
52 | + df.loc[i] = [item.row_num.string,itme.EXAMIN_DE.string,item.EXAMIN_SE_NM.string,item.EXAMIN_SE_CODE.string,item.EXAMIN_AREA_NAME.string,item.EXAMIN_AREA_CODE.string,item.EXAMIN_MRKT_NM.string,item.EXAMIN_MRKT_CODE.string,item.STD_MRKT_NM.string,item.STD_MRKT_CODE.string,item.EXAMIN_PRDLST_NM.string,item.EXAMIN_PRDLST_CODE.string,item.EXAMIN_SPCIES_NM.string,item.EXAMIN_SPCIES_CODE.string,item.STD_LCLAS_NM.string,item.STD_LCLAS_CO.string,item.STD_PRDLST_NM.string,item.STD_PRDLST_CODE.string,item.STD_SPCIES_NM.string,item.STD_SPCIES_CODE.string,item.EXAMIN_UNIT_NM.string,item.EXAMIN_UNIT.string,item.STD_UNIT_NM.string,item.STD_UNIT_CODE.string,item.EXAMIN_GRAD_NM.string,item.EXAMIN_GRAD_CODE.string,item.STD_GRAD_NM.string,item.STD_GRAD_CODE.string,item.TODAY_PRIC.string,item.BFRT_PRIC.string,item.IMP_TRADE.string,item.TRADE_AMT.string] | ||
53 | + i += 1 | ||
54 | + | ||
55 | + # 다음 1000개 | ||
56 | + number += 1 | ||
57 | + | ||
58 | + # 결과 확인을 위한 print | ||
59 | + print(str(day), ' : ', str(i)) | ||
60 | + # csv 파일로 내보내기 | ||
61 | + ############################################# change saved file directory #################################### | ||
62 | + df.to_csv(os.path.join('./jointmarketdata', output_file), encoding='euc-kr', index=False) | ||
63 | + | ||
64 | +def checkdatanum(day): | ||
65 | + myurl = url +'1/1?AUCNG_DE='+str(day) | ||
66 | + | ||
67 | + req = requests.get(myurl) | ||
68 | + html = req.text | ||
69 | + soup = BeautifulSoup(html, 'html.parser') | ||
70 | + product_num = soup.find('totalcnt') | ||
71 | + product_num = int(str(product_num)[10:-11]) | ||
72 | + print(day,':',product_num) | ||
73 | + return product_num | ||
74 | + | ||
75 | +i = 0 | ||
76 | +##################################### 시작일 & 종료일 변경 ############################################ | ||
77 | +last_day = datetime.date(2020,4,30) | ||
78 | +first_day = datetime.date(2020,4,15) - datetime.timedelta(days=1) | ||
79 | + | ||
80 | +while(first_day < last_day): | ||
81 | + first_day += datetime.timedelta(days=1) | ||
82 | + makecsvfile(first_day.strftime('%Y%m%d')) |
... | @@ -9,12 +9,14 @@ import datetime | ... | @@ -9,12 +9,14 @@ import datetime |
9 | 9 | ||
10 | url = 'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_20160624000000000349_1/' | 10 | url = 'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_20160624000000000349_1/' |
11 | 11 | ||
12 | +# 원하는 사이트에 맞게 항목만 바꿔주면 되는 함수 | ||
12 | def makecsvfile(day): | 13 | def makecsvfile(day): |
13 | 14 | ||
14 | # file name setting | 15 | # file name setting |
15 | output_file = str(day)+'data.csv' | 16 | output_file = str(day)+'data.csv' |
16 | 17 | ||
17 | # dataframe 생성 | 18 | # dataframe 생성 |
19 | + ########################## data column 변경 필수 ################################ | ||
18 | df = pd.DataFrame(columns=['row_num','aucng_de','cpr_nm','cpr_cd','cpr_type_nm','cpr_type_cd','prdlst_nm','prdlst_cd','spcies_nm','spcies_cd','delngbundle_qy','stndrd','stndrd_cd','grad','grad_cd','sanco','sannm','mumm_amt','avrg_amt','mxmm_amt','delng_qy','auc_co']) | 20 | df = pd.DataFrame(columns=['row_num','aucng_de','cpr_nm','cpr_cd','cpr_type_nm','cpr_type_cd','prdlst_nm','prdlst_cd','spcies_nm','spcies_cd','delngbundle_qy','stndrd','stndrd_cd','grad','grad_cd','sanco','sannm','mumm_amt','avrg_amt','mxmm_amt','delng_qy','auc_co']) |
19 | 21 | ||
20 | # 기본 number setting | 22 | # 기본 number setting |
... | @@ -46,6 +48,7 @@ def makecsvfile(day): | ... | @@ -46,6 +48,7 @@ def makecsvfile(day): |
46 | # if result is fine | 48 | # if result is fine |
47 | items = soup.find_all('row') | 49 | items = soup.find_all('row') |
48 | for item in items: | 50 | for item in items: |
51 | + ########################################## data column change ########################################## | ||
49 | df.loc[i] = [item.row_num.string, item.aucng_de.string, item.cpr_nm.string, item.cpr_cd.string, item.cpr_type_nm.string, item.cpr_type_cd.string, item.prdlst_nm.string, item.prdlst_cd.string, item.spcies_nm.string, item.spcies_cd.string, item.delngbundle_qy.string, item.stndrd.string, item.stndrd_cd.string, item.grad.string, item.grad_cd.string, item.sanco.string, item.sannm.string, item.mumm_amt.string, item.avrg_amt.string, item.mxmm_amt.string, item.delng_qy.string, item.auc_co.string] | 52 | df.loc[i] = [item.row_num.string, item.aucng_de.string, item.cpr_nm.string, item.cpr_cd.string, item.cpr_type_nm.string, item.cpr_type_cd.string, item.prdlst_nm.string, item.prdlst_cd.string, item.spcies_nm.string, item.spcies_cd.string, item.delngbundle_qy.string, item.stndrd.string, item.stndrd_cd.string, item.grad.string, item.grad_cd.string, item.sanco.string, item.sannm.string, item.mumm_amt.string, item.avrg_amt.string, item.mxmm_amt.string, item.delng_qy.string, item.auc_co.string] |
50 | i += 1 | 53 | i += 1 |
51 | 54 | ||
... | @@ -55,6 +58,7 @@ def makecsvfile(day): | ... | @@ -55,6 +58,7 @@ def makecsvfile(day): |
55 | # 결과 확인을 위한 print | 58 | # 결과 확인을 위한 print |
56 | print(str(day), ' : ', str(i)) | 59 | print(str(day), ' : ', str(i)) |
57 | # csv 파일로 내보내기 | 60 | # csv 파일로 내보내기 |
61 | + ############################################# change saved file directory #################################### | ||
58 | df.to_csv(os.path.join('./jointmarketdata', output_file), encoding='euc-kr', index=False) | 62 | df.to_csv(os.path.join('./jointmarketdata', output_file), encoding='euc-kr', index=False) |
59 | 63 | ||
60 | def checkdatanum(day): | 64 | def checkdatanum(day): |
... | @@ -69,8 +73,9 @@ def checkdatanum(day): | ... | @@ -69,8 +73,9 @@ def checkdatanum(day): |
69 | return product_num | 73 | return product_num |
70 | 74 | ||
71 | i = 0 | 75 | i = 0 |
72 | -last_day = datetime.date(2020,5,5) | 76 | +##################################### 시작일 & 종료일 변경 ############################################ |
73 | -first_day = datetime.date(2020,5,1) - datetime.timedelta(days=1) | 77 | +last_day = datetime.date(2020,4,30) |
78 | +first_day = datetime.date(2020,4,15) - datetime.timedelta(days=1) | ||
74 | 79 | ||
75 | while(first_day < last_day): | 80 | while(first_day < last_day): |
76 | first_day += datetime.timedelta(days=1) | 81 | first_day += datetime.timedelta(days=1) | ... | ... |
-
Please register or login to post a comment