Add file via upload

박은주
Commit b17f3f7640858c4a6781a25b81d7e07f3dc63f37 b17f3f76 1 parent b29f9e71
Showing 1 changed file with 29 additions and 20 deletions
content.py
--- a/content.py
View file @b17f3f7
+++ b/content.py
View file @b17f3f7
@@ -2,6 +2,7 @@ import csv
 import time
 import random
 import os
+ import pandas as pd
 
 from selenium import webdriver
 from bs4 import BeautifulSoup
@@ -11,14 +12,18 @@ def sleeptime():
     rand = random.uniform(1,3)
     time.sleep(rand)
 
+ def Click(xpath):
+     element = driver.find_element_by_xpath(xpath)
+     driver.execute_script("arguments[0].click();", element)
+     sleeptime()
 
 login_info = {
     'userID' : 'id',
-     'userpw' : 'passwd'
+     'userpw' : 'pw'
 }
 
 options = webdriver.ChromeOptions()
- options.add_argument('headless')
+ # options.add_argument('headless')
 options.add_argument('no-sandbox')
 options.add_argument('window-size=1920x1080')
 options.add_argument('disable-gpu')
@@ -26,7 +31,7 @@ options.add_argument('disable-dev-shm-usage')
 options.add_argument('lang=ko_KR')
 options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47')
 
- driver = webdriver.Chrome(r'C:\Users\Admin\Desktop\OSS\Todays_Issue\chromedriver.exe', options=options)
+ driver = webdriver.Chrome(r'C:\Users\E_N__\Desktop\chromedriver.exe', options=options)
 
 driver.get('about:blank')
 driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});")
@@ -46,39 +51,43 @@ yesterday = (datetime.today() - timedelta(1)).strftime('%m/%d %H:%M')
 swt = True
 page = 1
 
+ post_df = pd.DataFrame(columns=['title', 'content'])
 while swt:
     if not posts:
-         driver.find_element_by_xpath('//*[@id="submenu"]/div/div[2]/ul/li[1]/a').click()
+         Click('//*[@id="submenu"]/div/div[2]/ul/li[1]/a')
     else:
         if page == 1:
-             driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[2]/a').click()
+             Click('//*[@id="container"]/div[2]/div[2]/a')
             page += 1
         elif page == 2:
-             element = driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[2]/a[2]')
-             driver.execute_script("arguments[0].click();", element)
-             sleeptime()
+             Click('//*[@id="container"]/div[2]/div[2]/a[2]')
             page += 1
         else:
-             element = driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[2]/a[3]')
-             driver.execute_script("arguments[0].click();", element)
-             sleeptime()
+             Click('//*[@id="container"]/div[2]/div[2]/a[3]')
 
-     sleeptime()
     html = driver.page_source
     soup = BeautifulSoup(html, 'html.parser')
 
     TitleList = soup.select('#container > div.wrap.articles > article > a > h2')
-     ContentList = soup.select('#container > div.wrap.articles > article > a > p')
     DateList = soup.select('#container > div.wrap.articles > article > a > time')
+     ContentList = soup.select('#container > div.wrap.articles > article > a > p')
 
+     idx = 1
     for post in zip(TitleList, ContentList, DateList):
-         posts.append([post[0].text, post[1].text])
-         if post[2].text == yesterday:
+         Click('//*[@id="container"]/div[2]/article[{}]'.format(idx))
+         content = driver.find_element_by_xpath('//*[@id="container"]/div[2]/article/a/p').text
+         sleeptime()
+         idx += 1
+ 
+         post_df = post_df.append(pd.DataFrame([post[0].text, content],
+                                               columns=['title', 'content']))
+         if post[2].text < yesterday:
             swt = False
             break
 
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
- with open(os.path.join(BASE_DIR + '/', 'data.csv'), 'w+', encoding='utf-8-sig', newline='') as file:
-     writer = csv.writer(file)
-     for idx in range(len(posts)):
-         writer.writerow(posts[idx])
\ No newline at end of file
+ post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig')
+ # BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+ # with open(os.path.join(BASE_DIR + '/', 'data.csv'), 'w+', encoding='utf-8-sig', newline='') as file:
+ #     writer = csv.writer(file)
+ #     for idx in range(len(posts)):
+ #         writer.writerow(posts[idx])
\ No newline at end of file