Toggle navigation
Toggle navigation
This project
Loading...
Sign in
박은주
/
Todays_Issue
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
박은주
2021-05-31 17:33:03 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
03de454241356aa9ee6ae6474b970e61ad5e0cf1
03de4542
1 parent
89eccbf9
update
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
52 additions
and
21 deletions
.gitignore
content.py
.gitignore
View file @
03de454
...
...
@@ -6,4 +6,8 @@
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/
/.idea
\ No newline at end of file
/.idea
chromedriver.exe
/venv/
/.idea/
\ No newline at end of file
...
...
content.py
View file @
03de454
import
time
from
selenium
import
webdriver
from
bs4
import
BeautifulSoup
import
random
def
sleeptime
():
rand
=
random
.
uniform
(
1
,
3
)
time
.
sleep
(
rand
)
login_info
=
{
'userID'
:
'qdw0313'
,
'userpw'
:
'Uvjbt7G8dsnR6UN'
}
options
=
webdriver
.
ChromeOptions
()
options
.
add_argument
(
'headless'
)
options
.
add_argument
(
'no-sandbox'
)
options
.
add_argument
(
'window-size=1920x1080'
)
# options.add_argument('disable-gpu')
options
.
add_argument
(
'disable-dev-shm-usage'
)
options
.
add_argument
(
'lang=ko_KR'
)
options
.
add_argument
(
'user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47'
)
driver
=
webdriver
.
Chrome
(
r'C:\Users\Admin\Desktop\OSS\Todays_Issue\chromedriver.exe'
,
options
=
options
)
def
GetContent
(
url
):
options
=
webdriver
.
ChromeOptions
()
driver
.
get
(
'about:blank'
)
driver
.
execute_script
(
"Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});"
)
sleeptime
()
driver
.
get
(
'https://everytime.kr/login'
)
options
.
add_argument
(
'--headless'
)
options
.
add_argument
(
'--no-sandbox'
)
options
.
add_argument
(
'window-size=1920x1080'
)
sleeptime
()
driver
.
find_element_by_name
(
'userid'
)
.
send_keys
(
login_info
[
'userID'
])
driver
.
find_element_by_name
(
'password'
)
.
send_keys
(
login_info
[
'userpw'
])
driver
.
find_element_by_class_name
(
'submit'
)
.
click
()
sleeptime
()
driver
=
webdriver
.
Chrome
(
options
=
options
)
driver
.
get
(
url
)
html
=
driver
.
page_source
soup
=
BeautifulSoup
(
html
,
'html.parser'
)
# 국제캠 자게
driver
.
find_element_by_xpath
(
'//*[@id="submenu"]/div/div[2]/ul/li[1]/a'
)
.
click
()
sleeptime
(
)
#naverNews
content
=
soup
.
select_one
(
'#newsEndContents'
)
res
=
""
for
para
in
content
.
contents
:
stripped
=
str
(
para
)
.
strip
()
if
stripped
==
""
:
continue
if
stripped
[
0
]
not
in
[
"<"
,
"/"
]:
res
+=
str
(
para
)
.
strip
()
res
.
replace
(
"'"
,
""
)
# driver.find_element_by_css_selector('li.none').click()
html
=
driver
.
page_source
soup
=
BeautifulSoup
(
html
,
'html.parser'
)
#container > div.wrap.articles > article:nth-child(2) > a > h2
#container > div.wrap.articles > article:nth-child(3) > a > h2
#find_all
TitleList
=
soup
.
select
(
'#container > div.wrap.articles > article > a > h2'
)
return
res
\ No newline at end of file
data
=
[]
for
title
in
TitleList
:
data
+=
title
print
(
data
)
\ No newline at end of file
...
...
Please
register
or
login
to post a comment