Toggle navigation
Toggle navigation
This project
Loading...
Sign in
2020-1-capstone-design2
/
2015101793
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
양선아
2020-05-06 18:24:02 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
16ccdb0d96de61ea54a5c07adf0dcd41a13a94a9
16ccdb0d
1 parent
1e7682ec
python file for data crawling
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
92 additions
and
5 deletions
data/auctiondatacraw.py
data/distributiondata.py
data/jointmarketdata.py
data/auctiondatacraw.py
View file @
16ccdb0
...
...
@@ -7,7 +7,7 @@ import requests
import
os
import
datetime
url
=
'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_201
41119000000000012
_1/'
url
=
'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_201
60624000000000348
_1/'
def
makecsvfile
(
day
):
...
...
@@ -15,7 +15,7 @@ def makecsvfile(day):
output_file
=
str
(
day
)
+
'data.csv'
# dataframe 생성
df
=
pd
.
DataFrame
(
columns
=
[
'row_num'
,
'aucng_de'
,
'pblmng_whsal_mrkt_nm'
,
'pblmng_whsal_mrkt_cd'
,
'cpr_nm'
,
'cpr_cd'
,
'prdlst_nm'
,
'prdlst_cd'
,
'spcies_nm'
,
'spcies_cd'
,
'
grad'
,
'grad_cd'
,
'delngbundle_qy'
,
'stndrd'
,
'stndrd_cd'
,
'delng_qy'
,
'mumm_amt'
,
'avrg_amt'
,
'mxmm_amt'
,
'auc_co
'
])
df
=
pd
.
DataFrame
(
columns
=
[
'row_num'
,
'aucng_de'
,
'pblmng_whsal_mrkt_nm'
,
'pblmng_whsal_mrkt_cd'
,
'cpr_nm'
,
'cpr_cd'
,
'prdlst_nm'
,
'prdlst_cd'
,
'spcies_nm'
,
'spcies_cd'
,
'
delngbundle_qy'
,
'stndrd'
,
'stndrd_cd'
,
'grad'
,
'grad_cd'
,
'sanji_cd'
,
'sanji_nm'
,
'mumm_amt'
,
'avrg_amt'
,
'mxmm_amt'
,
'delng_qy'
,
'cnts
'
])
# 기본 number setting
i
=
0
# 날짜별 row
...
...
@@ -46,7 +46,7 @@ def makecsvfile(day):
# if result is fine
items
=
soup
.
find_all
(
'row'
)
for
item
in
items
:
df
.
loc
[
i
]
=
[
item
.
row_num
.
string
,
item
.
aucng_de
.
string
,
item
.
pblmng_whsal_mrkt_nm
.
string
,
item
.
pblmng_whsal_mrkt_cd
.
string
,
item
.
cpr_nm
.
string
,
item
.
cpr_cd
.
string
,
item
.
prdlst_nm
.
string
,
item
.
prdlst_cd
.
string
,
item
.
spcies_nm
.
string
,
item
.
spcies_cd
.
string
,
item
.
grad
.
string
,
item
.
grad_cd
.
string
,
item
.
delngbundle_qy
.
string
,
item
.
stndrd
.
string
,
item
.
stndrd_cd
.
string
,
item
.
delng_qy
.
string
,
item
.
mumm_amt
.
string
,
item
.
avrg_amt
.
string
,
item
.
mxmm_amt
.
string
,
item
.
auc_co
.
string
]
df
.
loc
[
i
]
=
[
item
.
row_num
.
string
,
item
.
aucng_de
.
string
,
item
.
pblmng_whsal_mrkt_nm
.
string
,
item
.
pblmng_whsal_mrkt_cd
.
string
,
item
.
cpr_nm
.
string
,
item
.
cpr_cd
.
string
,
item
.
prdlst_nm
.
string
,
item
.
prdlst_cd
.
string
,
item
.
spcies_nm
.
string
,
item
.
spcies_cd
.
string
,
item
.
delngbundle_qy
.
string
,
item
.
stndrd
.
string
,
item
.
stndrd_cd
.
string
,
item
.
grad
.
string
,
item
.
grad_cd
.
string
,
item
.
mumm_amt
.
string
,
item
.
avrg_amt
.
string
,
item
.
mxmm_amt
.
string
,
item
.
delng_qy
.
string
,
item
.
cnts
.
string
]
i
+=
1
# 다음 1000개
...
...
data/distributiondata.py
0 → 100644
View file @
16ccdb0
# -*- coding: utf-8 -*-
from
bs4
import
BeautifulSoup
from
urllib.request
import
urlopen
import
pandas
as
pd
import
requests
import
os
import
datetime
url
=
'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_20160722000000000352_1/'
# 원하는 사이트에 맞게 항목만 바꿔주면 되는 함수
def
makecsvfile
(
day
):
# file name setting
output_file
=
str
(
day
)
+
'data.csv'
# dataframe 생성
########################## data column 변경 필수 ################################
df
=
pd
.
DataFrame
(
columns
=
[
'ROW_NUM'
,
'EXAMIN_DE'
,
'EXAMIN_SE_NM'
,
'EXAMIN_SE_CODE'
,
'EXAMIN_AREA_NAME'
,
'EXAMIN_AREA_CODE'
,
'EXAMIN_MRKT_NM'
.
'EXAMIN_MRKT_CODE'
,
'STD_MRKT_NM'
,
'STD_MRKT_CODE'
,
'EXAMIN_PRDLST_NM'
,
'EXAMIN_PRDLST_CODE'
,
'EXAMIN_SPCIES_NM'
,
'EXAMIN_SPCIES_CODE'
,
'STD_LCLAS_NM'
,
'STD_LCLAS_CO'
,
'STD_PRDLST_NM'
,
'STD_PRDLST_CODE'
,
'STD_SPCIES_NM'
,
'STD_SPCIES_CODE'
,
'EXAMIN_UNIT_NM'
,
'EXAMIN_UNIT'
,
'STD_UNIT_NM'
,
'STD_UNIT_CODE'
,
'EXAMIN_GRAD_NM'
,
'EXAMIN_GRAD_CODE'
,
'STD_GRAD_NM'
,
'STD_GRAD_CODE'
,
'TODAY_PRIC'
,
'BFRT_PRIC'
,
'IMP_TRADE'
,
'TRADE_AMT'
])
# 기본 number setting
i
=
0
# 날짜별 row
number
=
0
while
(
True
):
# url 생성 & data 받아오기
myurl
=
url
+
str
(
number
*
1000
+
1
)
+
'/'
+
str
((
number
+
1
)
*
1000
)
+
'?AUCNG_DE='
+
str
(
day
)
data
=
urlopen
(
myurl
)
.
read
()
soup
=
BeautifulSoup
(
data
,
'html.parser'
)
# data error check
result_code
=
soup
.
find
(
'result'
)
result_code
=
result_code
.
code
.
string
if
result_code
!=
'INFO-000'
:
print
(
result_code
)
break
# data number check
start_num
=
int
(
str
(
soup
.
find
(
'startrow'
))[
10
:
-
11
])
total_num
=
int
(
str
(
soup
.
find
(
'totalcnt'
))[
10
:
-
11
])
print
(
str
(
soup
.
find
(
'startrow'
))[
10
:
-
11
])
if
total_num
<
start_num
:
print
(
'find all'
)
break
# if result is fine
items
=
soup
.
find_all
(
'row'
)
for
item
in
items
:
########################################## data column change ##########################################
df
.
loc
[
i
]
=
[
item
.
row_num
.
string
,
itme
.
EXAMIN_DE
.
string
,
item
.
EXAMIN_SE_NM
.
string
,
item
.
EXAMIN_SE_CODE
.
string
,
item
.
EXAMIN_AREA_NAME
.
string
,
item
.
EXAMIN_AREA_CODE
.
string
,
item
.
EXAMIN_MRKT_NM
.
string
,
item
.
EXAMIN_MRKT_CODE
.
string
,
item
.
STD_MRKT_NM
.
string
,
item
.
STD_MRKT_CODE
.
string
,
item
.
EXAMIN_PRDLST_NM
.
string
,
item
.
EXAMIN_PRDLST_CODE
.
string
,
item
.
EXAMIN_SPCIES_NM
.
string
,
item
.
EXAMIN_SPCIES_CODE
.
string
,
item
.
STD_LCLAS_NM
.
string
,
item
.
STD_LCLAS_CO
.
string
,
item
.
STD_PRDLST_NM
.
string
,
item
.
STD_PRDLST_CODE
.
string
,
item
.
STD_SPCIES_NM
.
string
,
item
.
STD_SPCIES_CODE
.
string
,
item
.
EXAMIN_UNIT_NM
.
string
,
item
.
EXAMIN_UNIT
.
string
,
item
.
STD_UNIT_NM
.
string
,
item
.
STD_UNIT_CODE
.
string
,
item
.
EXAMIN_GRAD_NM
.
string
,
item
.
EXAMIN_GRAD_CODE
.
string
,
item
.
STD_GRAD_NM
.
string
,
item
.
STD_GRAD_CODE
.
string
,
item
.
TODAY_PRIC
.
string
,
item
.
BFRT_PRIC
.
string
,
item
.
IMP_TRADE
.
string
,
item
.
TRADE_AMT
.
string
]
i
+=
1
# 다음 1000개
number
+=
1
# 결과 확인을 위한 print
print
(
str
(
day
),
' : '
,
str
(
i
))
# csv 파일로 내보내기
############################################# change saved file directory ####################################
df
.
to_csv
(
os
.
path
.
join
(
'./jointmarketdata'
,
output_file
),
encoding
=
'euc-kr'
,
index
=
False
)
def
checkdatanum
(
day
):
myurl
=
url
+
'1/1?AUCNG_DE='
+
str
(
day
)
req
=
requests
.
get
(
myurl
)
html
=
req
.
text
soup
=
BeautifulSoup
(
html
,
'html.parser'
)
product_num
=
soup
.
find
(
'totalcnt'
)
product_num
=
int
(
str
(
product_num
)[
10
:
-
11
])
print
(
day
,
':'
,
product_num
)
return
product_num
i
=
0
##################################### 시작일 & 종료일 변경 ############################################
last_day
=
datetime
.
date
(
2020
,
4
,
30
)
first_day
=
datetime
.
date
(
2020
,
4
,
15
)
-
datetime
.
timedelta
(
days
=
1
)
while
(
first_day
<
last_day
):
first_day
+=
datetime
.
timedelta
(
days
=
1
)
makecsvfile
(
first_day
.
strftime
(
'
%
Y
%
m
%
d'
))
data/jointmarketdata.py
View file @
16ccdb0
...
...
@@ -9,12 +9,14 @@ import datetime
url
=
'http://211.237.50.150:7080/openapi/5e5e94876b673efe7161d3240516d65476da16210a391a9d6f31224c54a1fdaf/xml/Grid_20160624000000000349_1/'
# 원하는 사이트에 맞게 항목만 바꿔주면 되는 함수
def
makecsvfile
(
day
):
# file name setting
output_file
=
str
(
day
)
+
'data.csv'
# dataframe 생성
########################## data column 변경 필수 ################################
df
=
pd
.
DataFrame
(
columns
=
[
'row_num'
,
'aucng_de'
,
'cpr_nm'
,
'cpr_cd'
,
'cpr_type_nm'
,
'cpr_type_cd'
,
'prdlst_nm'
,
'prdlst_cd'
,
'spcies_nm'
,
'spcies_cd'
,
'delngbundle_qy'
,
'stndrd'
,
'stndrd_cd'
,
'grad'
,
'grad_cd'
,
'sanco'
,
'sannm'
,
'mumm_amt'
,
'avrg_amt'
,
'mxmm_amt'
,
'delng_qy'
,
'auc_co'
])
# 기본 number setting
...
...
@@ -46,6 +48,7 @@ def makecsvfile(day):
# if result is fine
items
=
soup
.
find_all
(
'row'
)
for
item
in
items
:
########################################## data column change ##########################################
df
.
loc
[
i
]
=
[
item
.
row_num
.
string
,
item
.
aucng_de
.
string
,
item
.
cpr_nm
.
string
,
item
.
cpr_cd
.
string
,
item
.
cpr_type_nm
.
string
,
item
.
cpr_type_cd
.
string
,
item
.
prdlst_nm
.
string
,
item
.
prdlst_cd
.
string
,
item
.
spcies_nm
.
string
,
item
.
spcies_cd
.
string
,
item
.
delngbundle_qy
.
string
,
item
.
stndrd
.
string
,
item
.
stndrd_cd
.
string
,
item
.
grad
.
string
,
item
.
grad_cd
.
string
,
item
.
sanco
.
string
,
item
.
sannm
.
string
,
item
.
mumm_amt
.
string
,
item
.
avrg_amt
.
string
,
item
.
mxmm_amt
.
string
,
item
.
delng_qy
.
string
,
item
.
auc_co
.
string
]
i
+=
1
...
...
@@ -55,6 +58,7 @@ def makecsvfile(day):
# 결과 확인을 위한 print
print
(
str
(
day
),
' : '
,
str
(
i
))
# csv 파일로 내보내기
############################################# change saved file directory ####################################
df
.
to_csv
(
os
.
path
.
join
(
'./jointmarketdata'
,
output_file
),
encoding
=
'euc-kr'
,
index
=
False
)
def
checkdatanum
(
day
):
...
...
@@ -69,8 +73,9 @@ def checkdatanum(day):
return
product_num
i
=
0
last_day
=
datetime
.
date
(
2020
,
5
,
5
)
first_day
=
datetime
.
date
(
2020
,
5
,
1
)
-
datetime
.
timedelta
(
days
=
1
)
##################################### 시작일 & 종료일 변경 ############################################
last_day
=
datetime
.
date
(
2020
,
4
,
30
)
first_day
=
datetime
.
date
(
2020
,
4
,
15
)
-
datetime
.
timedelta
(
days
=
1
)
while
(
first_day
<
last_day
):
first_day
+=
datetime
.
timedelta
(
days
=
1
)
...
...
Please
register
or
login
to post a comment