Ubuntu

Add UTC setting

......@@ -11,6 +11,8 @@ from selenium.common.exceptions import NoSuchElementException
from hanspell import spell_checker
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from pytz import timezone
# from pyvirtualdisplay import Display
BASE_DIR = os.path.dirname(os.path.realpath(__file__))
......@@ -32,6 +34,9 @@ def TextPreprocess(text):
return text
def GetData():
# display = Display(visible=0, size=(1920,1080))
# display.start()
login_info = {
'userID' : 'qdw0313',
'userpw' : 'fejUfrQxHWwtcGcP0'
......@@ -47,7 +52,10 @@ def GetData():
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47')
# driver = webdriver.Chrome(r'C:\Users\E_N__\Desktop\chromedriver.exe', options=options)
driver = webdriver.Chrome(BASE_DIR + '/chromedriver.exe', options=options)
# driver = webdriver.Chrome(executable_path=BASE_DIR + '/chromedriver.exe', options=options)
driver = webdriver.Chrome(options=options)
utc_patam = {'timezoneId': 'Asia/Seoul'}
driver.execute_cdp_cmd('Emulation.setTimezoneOverride', utc_patam)
driver.get('about:blank')
driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});")
......@@ -61,8 +69,12 @@ def GetData():
sleeptime()
# 국제캠 자게
KST = timezone('Asia/Seoul')
today = datetime.utcnow().astimezone(KST)
# today = datetime.today()
sleeptime()
yesterday = (datetime.today() - timedelta(1)).strftime('%m/%d %H:%M')
yesterday = (today - timedelta(1)).strftime('%m/%d %H:%M')
print(yesterday)
swt = True
page = 0
......@@ -85,8 +97,8 @@ def GetData():
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
TitleList = soup.select('#container > div.wrap.articles > article > a > h2')
DateList = soup.select('#container > div.wrap.articles > article > a > time')
TitleList = soup.select('#container > div.wrap.articles > article > a > h2')
for post in zip(TitleList, DateList):
title = TextPreprocess(post[0].text)
......@@ -103,13 +115,16 @@ def GetData():
content = TextPreprocess(content)
post_df = post_df.append(pd.DataFrame([[title, content]],
columns=['title', 'content']))
print(post)
print(content)
if post[1].text <= yesterday:
swt = False
break
break
print('next page')
# post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False)
# print("CVS file saved")
#
post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False)
print("CVS file saved")
# with open('data.json', 'w+', encoding='utf-8-sig') as json_file:
# for post in zip(post_df['title'].tolist(), post_df['content'].tolist()):
# json.dump({
......