content.py
3.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import csv
import time
import random
import os
import pandas as pd
from selenium import webdriver
from bs4 import BeautifulSoup
from datetime import datetime, timedelta\
def sleeptime():
rand = random.uniform(1,3)
time.sleep(rand)
def Click(xpath):
element = driver.find_element_by_xpath(xpath)
driver.execute_script("arguments[0].click();", element)
sleeptime()
login_info = {
'userID' : 'id',
'userpw' : 'pw'
}
options = webdriver.ChromeOptions()
# options.add_argument('headless')
options.add_argument('no-sandbox')
options.add_argument('window-size=1920x1080')
options.add_argument('disable-gpu')
options.add_argument('disable-dev-shm-usage')
options.add_argument('lang=ko_KR')
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47')
driver = webdriver.Chrome(r'C:\Users\E_N__\Desktop\chromedriver.exe', options=options)
driver.get('about:blank')
driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});")
sleeptime()
driver.get('https://everytime.kr/login')
sleeptime()
driver.find_element_by_name('userid').send_keys(login_info['userID'])
driver.find_element_by_name('password').send_keys(login_info['userpw'])
driver.find_element_by_class_name('submit').click()
sleeptime()
# 국제캠 자게
sleeptime()
posts = []
yesterday = (datetime.today() - timedelta(1)).strftime('%m/%d %H:%M')
swt = True
page = 1
post_df = pd.DataFrame(columns=['title', 'content'])
while swt:
if not posts:
Click('//*[@id="submenu"]/div/div[2]/ul/li[1]/a')
else:
if page == 1:
Click('//*[@id="container"]/div[2]/div[2]/a')
page += 1
elif page == 2:
Click('//*[@id="container"]/div[2]/div[2]/a[2]')
page += 1
else:
Click('//*[@id="container"]/div[2]/div[2]/a[3]')
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
TitleList = soup.select('#container > div.wrap.articles > article > a > h2')
DateList = soup.select('#container > div.wrap.articles > article > a > time')
ContentList = soup.select('#container > div.wrap.articles > article > a > p')
idx = 1
for post in zip(TitleList, ContentList, DateList):
Click('//*[@id="container"]/div[2]/article[{}]'.format(idx))
content = driver.find_element_by_xpath('//*[@id="container"]/div[2]/article/a/p').text
sleeptime()
idx += 1
post_df = post_df.append(pd.DataFrame([post[0].text, content],
columns=['title', 'content']))
if post[2].text < yesterday:
swt = False
break
post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig')
# BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# with open(os.path.join(BASE_DIR + '/', 'data.csv'), 'w+', encoding='utf-8-sig', newline='') as file:
# writer = csv.writer(file)
# for idx in range(len(posts)):
# writer.writerow(posts[idx])