박은주

update

...@@ -7,3 +7,7 @@ ...@@ -7,3 +7,7 @@
7 # Editor-based HTTP Client requests 7 # Editor-based HTTP Client requests
8 /httpRequests/ 8 /httpRequests/
9 /.idea 9 /.idea
10 +
11 +chromedriver.exe
12 +/venv/
13 +/.idea/
...\ No newline at end of file ...\ No newline at end of file
......
1 +import time
2 +
1 from selenium import webdriver 3 from selenium import webdriver
2 from bs4 import BeautifulSoup 4 from bs4 import BeautifulSoup
5 +import random
6 +
7 +def sleeptime():
8 + rand = random.uniform(1,3)
9 + time.sleep(rand)
10 +
11 +
12 +login_info = {
13 + 'userID' : 'qdw0313',
14 + 'userpw' : 'Uvjbt7G8dsnR6UN'
15 +}
16 +
17 +options = webdriver.ChromeOptions()
18 +options.add_argument('headless')
19 +options.add_argument('no-sandbox')
20 +options.add_argument('window-size=1920x1080')
21 +# options.add_argument('disable-gpu')
22 +options.add_argument('disable-dev-shm-usage')
23 +options.add_argument('lang=ko_KR')
24 +options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47')
25 +
26 +driver = webdriver.Chrome(r'C:\Users\Admin\Desktop\OSS\Todays_Issue\chromedriver.exe', options=options)
3 27
4 -def GetContent(url): 28 +driver.get('about:blank')
5 - options = webdriver.ChromeOptions() 29 +driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});")
30 +sleeptime()
31 +driver.get('https://everytime.kr/login')
6 32
7 - options.add_argument('--headless') 33 +sleeptime()
8 - options.add_argument('--no-sandbox') 34 +driver.find_element_by_name('userid').send_keys(login_info['userID'])
9 - options.add_argument('window-size=1920x1080') 35 +driver.find_element_by_name('password').send_keys(login_info['userpw'])
36 +driver.find_element_by_class_name('submit').click()
37 +sleeptime()
10 38
11 - driver = webdriver.Chrome(options=options)
12 39
13 - driver.get(url) 40 +# 국제캠 자게
14 - html = driver.page_source 41 +driver.find_element_by_xpath('//*[@id="submenu"]/div/div[2]/ul/li[1]/a').click()
15 - soup = BeautifulSoup(html, 'html.parser') 42 +sleeptime()
16 43
17 - #naverNews 44 +# driver.find_element_by_css_selector('li.none').click()
18 - content = soup.select_one('#newsEndContents') 45 +html = driver.page_source
19 - res = "" 46 +soup = BeautifulSoup(html, 'html.parser')
20 - for para in content.contents: 47 +#container > div.wrap.articles > article:nth-child(2) > a > h2
21 - stripped = str(para).strip() 48 +#container > div.wrap.articles > article:nth-child(3) > a > h2
22 - if stripped == "": 49 +#find_all
23 - continue 50 +TitleList = soup.select('#container > div.wrap.articles > article > a > h2')
24 - if stripped[0] not in ["<", "/"]:
25 - res += str(para).strip()
26 - res.replace("&apos;", "")
27 51
28 - return res
...\ No newline at end of file ...\ No newline at end of file
52 +data = []
53 +for title in TitleList:
54 + data += title
55 +print(data)
...\ No newline at end of file ...\ No newline at end of file
......