update

박은주
Commit 03de454241356aa9ee6ae6474b970e61ad5e0cf1 03de4542 1 parent 89eccbf9
Showing 2 changed files with 51 additions and 20 deletions
.gitignore
content.py
--- a/.gitignore
View file @03de454
+++ b/.gitignore
View file @03de454
@@ -7,3 +7,7 @@
 # Editor-based HTTP Client requests
 /httpRequests/
 /.idea
+
+chromedriver.exe
+/venv/
+/.idea/
\ No newline at end of file
--- a/content.py
View file @03de454
+++ b/content.py
View file @03de454
+import time
+
 from selenium import webdriver
 from bs4 import BeautifulSoup
+import random
+
+def sleeptime():
+    rand = random.uniform(1,3)
+    time.sleep(rand)
+
+
+login_info = {
+    'userID' : 'qdw0313',
+    'userpw' : 'Uvjbt7G8dsnR6UN'
+}
+
+options = webdriver.ChromeOptions()
+options.add_argument('headless')
+options.add_argument('no-sandbox')
+options.add_argument('window-size=1920x1080')
+# options.add_argument('disable-gpu')
+options.add_argument('disable-dev-shm-usage')
+options.add_argument('lang=ko_KR')
+options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47')
+
+driver = webdriver.Chrome(r'C:\Users\Admin\Desktop\OSS\Todays_Issue\chromedriver.exe', options=options)
-def GetContent(url):
+driver.get('about:blank')
-    options = webdriver.ChromeOptions()
+driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});")
+sleeptime()
+driver.get('https://everytime.kr/login')
-    options.add_argument('--headless')
+sleeptime()
-    options.add_argument('--no-sandbox')
+driver.find_element_by_name('userid').send_keys(login_info['userID'])
-    options.add_argument('window-size=1920x1080')
+driver.find_element_by_name('password').send_keys(login_info['userpw'])
+driver.find_element_by_class_name('submit').click()
+sleeptime()
-    driver = webdriver.Chrome(options=options)
-    driver.get(url)
+# 국제캠 자게
-    html = driver.page_source
+driver.find_element_by_xpath('//*[@id="submenu"]/div/div[2]/ul/li[1]/a').click()
-    soup = BeautifulSoup(html, 'html.parser')
+sleeptime()
-    #naverNews
+# driver.find_element_by_css_selector('li.none').click()
-    content = soup.select_one('#newsEndContents')
+html = driver.page_source
-    res = ""
+soup = BeautifulSoup(html, 'html.parser')
-    for para in content.contents:
+#container > div.wrap.articles > article:nth-child(2) > a > h2
-        stripped = str(para).strip()
+#container > div.wrap.articles > article:nth-child(3) > a > h2
-        if stripped == "":
+#find_all
-            continue
+TitleList = soup.select('#container > div.wrap.articles > article > a > h2')
-        if stripped[0] not in ["<", "/"]:
-            res += str(para).strip()
-    res.replace("&apos;", "")
-    return res
\ No newline at end of file
+data = []
+for title in TitleList:
+    data += title
+print(data)
\ No newline at end of file