Showing
2 changed files
with
51 additions
and
20 deletions
1 | +import time | ||
2 | + | ||
1 | from selenium import webdriver | 3 | from selenium import webdriver |
2 | from bs4 import BeautifulSoup | 4 | from bs4 import BeautifulSoup |
5 | +import random | ||
6 | + | ||
7 | +def sleeptime(): | ||
8 | + rand = random.uniform(1,3) | ||
9 | + time.sleep(rand) | ||
10 | + | ||
11 | + | ||
12 | +login_info = { | ||
13 | + 'userID' : 'qdw0313', | ||
14 | + 'userpw' : 'Uvjbt7G8dsnR6UN' | ||
15 | +} | ||
16 | + | ||
17 | +options = webdriver.ChromeOptions() | ||
18 | +options.add_argument('headless') | ||
19 | +options.add_argument('no-sandbox') | ||
20 | +options.add_argument('window-size=1920x1080') | ||
21 | +# options.add_argument('disable-gpu') | ||
22 | +options.add_argument('disable-dev-shm-usage') | ||
23 | +options.add_argument('lang=ko_KR') | ||
24 | +options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47') | ||
25 | + | ||
26 | +driver = webdriver.Chrome(r'C:\Users\Admin\Desktop\OSS\Todays_Issue\chromedriver.exe', options=options) | ||
3 | 27 | ||
4 | -def GetContent(url): | 28 | +driver.get('about:blank') |
5 | - options = webdriver.ChromeOptions() | 29 | +driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});") |
30 | +sleeptime() | ||
31 | +driver.get('https://everytime.kr/login') | ||
6 | 32 | ||
7 | - options.add_argument('--headless') | 33 | +sleeptime() |
8 | - options.add_argument('--no-sandbox') | 34 | +driver.find_element_by_name('userid').send_keys(login_info['userID']) |
9 | - options.add_argument('window-size=1920x1080') | 35 | +driver.find_element_by_name('password').send_keys(login_info['userpw']) |
36 | +driver.find_element_by_class_name('submit').click() | ||
37 | +sleeptime() | ||
10 | 38 | ||
11 | - driver = webdriver.Chrome(options=options) | ||
12 | 39 | ||
13 | - driver.get(url) | 40 | +# 국제캠 자게 |
14 | - html = driver.page_source | 41 | +driver.find_element_by_xpath('//*[@id="submenu"]/div/div[2]/ul/li[1]/a').click() |
15 | - soup = BeautifulSoup(html, 'html.parser') | 42 | +sleeptime() |
16 | 43 | ||
17 | - #naverNews | 44 | +# driver.find_element_by_css_selector('li.none').click() |
18 | - content = soup.select_one('#newsEndContents') | 45 | +html = driver.page_source |
19 | - res = "" | 46 | +soup = BeautifulSoup(html, 'html.parser') |
20 | - for para in content.contents: | 47 | +#container > div.wrap.articles > article:nth-child(2) > a > h2 |
21 | - stripped = str(para).strip() | 48 | +#container > div.wrap.articles > article:nth-child(3) > a > h2 |
22 | - if stripped == "": | 49 | +#find_all |
23 | - continue | 50 | +TitleList = soup.select('#container > div.wrap.articles > article > a > h2') |
24 | - if stripped[0] not in ["<", "/"]: | ||
25 | - res += str(para).strip() | ||
26 | - res.replace("'", "") | ||
27 | 51 | ||
28 | - return res | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
52 | +data = [] | ||
53 | +for title in TitleList: | ||
54 | + data += title | ||
55 | +print(data) | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment