Ubuntu

Add UTC setting

...@@ -11,6 +11,8 @@ from selenium.common.exceptions import NoSuchElementException ...@@ -11,6 +11,8 @@ from selenium.common.exceptions import NoSuchElementException
11 from hanspell import spell_checker 11 from hanspell import spell_checker
12 from bs4 import BeautifulSoup 12 from bs4 import BeautifulSoup
13 from datetime import datetime, timedelta 13 from datetime import datetime, timedelta
14 +from pytz import timezone
15 +# from pyvirtualdisplay import Display
14 16
15 BASE_DIR = os.path.dirname(os.path.realpath(__file__)) 17 BASE_DIR = os.path.dirname(os.path.realpath(__file__))
16 18
...@@ -32,6 +34,9 @@ def TextPreprocess(text): ...@@ -32,6 +34,9 @@ def TextPreprocess(text):
32 return text 34 return text
33 35
34 def GetData(): 36 def GetData():
37 + # display = Display(visible=0, size=(1920,1080))
38 + # display.start()
39 +
35 login_info = { 40 login_info = {
36 'userID' : 'qdw0313', 41 'userID' : 'qdw0313',
37 'userpw' : 'fejUfrQxHWwtcGcP0' 42 'userpw' : 'fejUfrQxHWwtcGcP0'
...@@ -47,7 +52,10 @@ def GetData(): ...@@ -47,7 +52,10 @@ def GetData():
47 options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47') 52 options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47')
48 53
49 # driver = webdriver.Chrome(r'C:\Users\E_N__\Desktop\chromedriver.exe', options=options) 54 # driver = webdriver.Chrome(r'C:\Users\E_N__\Desktop\chromedriver.exe', options=options)
50 - driver = webdriver.Chrome(BASE_DIR + '/chromedriver.exe', options=options) 55 + # driver = webdriver.Chrome(executable_path=BASE_DIR + '/chromedriver.exe', options=options)
56 + driver = webdriver.Chrome(options=options)
57 + utc_patam = {'timezoneId': 'Asia/Seoul'}
58 + driver.execute_cdp_cmd('Emulation.setTimezoneOverride', utc_patam)
51 59
52 driver.get('about:blank') 60 driver.get('about:blank')
53 driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});") 61 driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});")
...@@ -61,8 +69,12 @@ def GetData(): ...@@ -61,8 +69,12 @@ def GetData():
61 sleeptime() 69 sleeptime()
62 70
63 # 국제캠 자게 71 # 국제캠 자게
72 + KST = timezone('Asia/Seoul')
73 + today = datetime.utcnow().astimezone(KST)
74 + # today = datetime.today()
75 +
64 sleeptime() 76 sleeptime()
65 - yesterday = (datetime.today() - timedelta(1)).strftime('%m/%d %H:%M') 77 + yesterday = (today - timedelta(1)).strftime('%m/%d %H:%M')
66 print(yesterday) 78 print(yesterday)
67 swt = True 79 swt = True
68 page = 0 80 page = 0
...@@ -85,8 +97,8 @@ def GetData(): ...@@ -85,8 +97,8 @@ def GetData():
85 html = driver.page_source 97 html = driver.page_source
86 soup = BeautifulSoup(html, 'html.parser') 98 soup = BeautifulSoup(html, 'html.parser')
87 99
88 - TitleList = soup.select('#container > div.wrap.articles > article > a > h2')
89 DateList = soup.select('#container > div.wrap.articles > article > a > time') 100 DateList = soup.select('#container > div.wrap.articles > article > a > time')
101 + TitleList = soup.select('#container > div.wrap.articles > article > a > h2')
90 102
91 for post in zip(TitleList, DateList): 103 for post in zip(TitleList, DateList):
92 title = TextPreprocess(post[0].text) 104 title = TextPreprocess(post[0].text)
...@@ -103,13 +115,16 @@ def GetData(): ...@@ -103,13 +115,16 @@ def GetData():
103 content = TextPreprocess(content) 115 content = TextPreprocess(content)
104 post_df = post_df.append(pd.DataFrame([[title, content]], 116 post_df = post_df.append(pd.DataFrame([[title, content]],
105 columns=['title', 'content'])) 117 columns=['title', 'content']))
118 + print(post)
119 + print(content)
106 if post[1].text <= yesterday: 120 if post[1].text <= yesterday:
121 + swt = False
107 break 122 break
108 - break 123 + print('next page')
124 +
125 + post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False)
126 + print("CVS file saved")
109 127
110 - # post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False)
111 - # print("CVS file saved")
112 - #
113 # with open('data.json', 'w+', encoding='utf-8-sig') as json_file: 128 # with open('data.json', 'w+', encoding='utf-8-sig') as json_file:
114 # for post in zip(post_df['title'].tolist(), post_df['content'].tolist()): 129 # for post in zip(post_df['title'].tolist(), post_df['content'].tolist()):
115 # json.dump({ 130 # json.dump({
......