content.py 2.78 KB
import csv
import time
import random
import os

from selenium import webdriver
from bs4 import BeautifulSoup
from datetime import datetime, timedelta\

def sleeptime():
    rand = random.uniform(1,3)
    time.sleep(rand)


login_info = {
    'userID' : 'id',
    'userpw' : 'passwd'
}

options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('no-sandbox')
options.add_argument('window-size=1920x1080')
options.add_argument('disable-gpu')
options.add_argument('disable-dev-shm-usage')
options.add_argument('lang=ko_KR')
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47')

driver = webdriver.Chrome(r'C:\Users\Admin\Desktop\OSS\Todays_Issue\chromedriver.exe', options=options)

driver.get('about:blank')
driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});")
sleeptime()
driver.get('https://everytime.kr/login')

sleeptime()
driver.find_element_by_name('userid').send_keys(login_info['userID'])
driver.find_element_by_name('password').send_keys(login_info['userpw'])
driver.find_element_by_class_name('submit').click()
sleeptime()

# 국제캠 자게
sleeptime()
posts = []
yesterday = (datetime.today() - timedelta(1)).strftime('%m/%d %H:%M')
swt = True
page = 1

while swt:
    if not posts:
        driver.find_element_by_xpath('//*[@id="submenu"]/div/div[2]/ul/li[1]/a').click()
    else:
        if page == 1:
            driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[2]/a').click()
            page += 1
        elif page == 2:
            element = driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[2]/a[2]')
            driver.execute_script("arguments[0].click();", element)
            sleeptime()
            page += 1
        else:
            element = driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[2]/a[3]')
            driver.execute_script("arguments[0].click();", element)
            sleeptime()

    sleeptime()
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    TitleList = soup.select('#container > div.wrap.articles > article > a > h2')
    ContentList = soup.select('#container > div.wrap.articles > article > a > p')
    DateList = soup.select('#container > div.wrap.articles > article > a > time')

    for post in zip(TitleList, ContentList, DateList):
        posts.append([post[0].text, post[1].text])
        if post[2].text == yesterday:
            swt = False
            break

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(BASE_DIR + '/', 'data.csv'), 'w+', encoding='utf-8-sig', newline='') as file:
    writer = csv.writer(file)
    for idx in range(len(posts)):
        writer.writerow(posts[idx])