Toggle navigation
Toggle navigation
This project
Loading...
Sign in
박은주
/
Todays_Issue
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Ubuntu
2021-06-09 00:51:16 +0000
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
a87d91d69a1354a956f03fed3cfbc1c3c4068594
a87d91d6
1 parent
01c35964
Add UTC setting
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
7 deletions
content.py
content.py
View file @
a87d91d
...
...
@@ -11,6 +11,8 @@ from selenium.common.exceptions import NoSuchElementException
from
hanspell
import
spell_checker
from
bs4
import
BeautifulSoup
from
datetime
import
datetime
,
timedelta
from
pytz
import
timezone
# from pyvirtualdisplay import Display
BASE_DIR
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
...
...
@@ -32,6 +34,9 @@ def TextPreprocess(text):
return
text
def
GetData
():
# display = Display(visible=0, size=(1920,1080))
# display.start()
login_info
=
{
'userID'
:
'qdw0313'
,
'userpw'
:
'fejUfrQxHWwtcGcP0'
...
...
@@ -47,7 +52,10 @@ def GetData():
options
.
add_argument
(
'user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47'
)
# driver = webdriver.Chrome(r'C:\Users\E_N__\Desktop\chromedriver.exe', options=options)
driver
=
webdriver
.
Chrome
(
BASE_DIR
+
'/chromedriver.exe'
,
options
=
options
)
# driver = webdriver.Chrome(executable_path=BASE_DIR + '/chromedriver.exe', options=options)
driver
=
webdriver
.
Chrome
(
options
=
options
)
utc_patam
=
{
'timezoneId'
:
'Asia/Seoul'
}
driver
.
execute_cdp_cmd
(
'Emulation.setTimezoneOverride'
,
utc_patam
)
driver
.
get
(
'about:blank'
)
driver
.
execute_script
(
"Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});"
)
...
...
@@ -61,8 +69,12 @@ def GetData():
sleeptime
()
# 국제캠 자게
KST
=
timezone
(
'Asia/Seoul'
)
today
=
datetime
.
utcnow
()
.
astimezone
(
KST
)
# today = datetime.today()
sleeptime
()
yesterday
=
(
datetime
.
today
()
-
timedelta
(
1
))
.
strftime
(
'
%
m/
%
d
%
H:
%
M'
)
yesterday
=
(
today
-
timedelta
(
1
))
.
strftime
(
'
%
m/
%
d
%
H:
%
M'
)
print
(
yesterday
)
swt
=
True
page
=
0
...
...
@@ -85,8 +97,8 @@ def GetData():
html
=
driver
.
page_source
soup
=
BeautifulSoup
(
html
,
'html.parser'
)
TitleList
=
soup
.
select
(
'#container > div.wrap.articles > article > a > h2'
)
DateList
=
soup
.
select
(
'#container > div.wrap.articles > article > a > time'
)
TitleList
=
soup
.
select
(
'#container > div.wrap.articles > article > a > h2'
)
for
post
in
zip
(
TitleList
,
DateList
):
title
=
TextPreprocess
(
post
[
0
]
.
text
)
...
...
@@ -103,13 +115,16 @@ def GetData():
content
=
TextPreprocess
(
content
)
post_df
=
post_df
.
append
(
pd
.
DataFrame
([[
title
,
content
]],
columns
=
[
'title'
,
'content'
]))
print
(
post
)
print
(
content
)
if
post
[
1
]
.
text
<=
yesterday
:
swt
=
False
break
break
print
(
'next page'
)
post_df
.
to_csv
(
'data.csv'
,
mode
=
'w'
,
encoding
=
'utf-8-sig'
,
index
=
False
)
print
(
"CVS file saved"
)
# post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False)
# print("CVS file saved")
#
# with open('data.json', 'w+', encoding='utf-8-sig') as json_file:
# for post in zip(post_df['title'].tolist(), post_df['content'].tolist()):
# json.dump({
...
...
Please
register
or
login
to post a comment