Showing
1 changed file
with
22 additions
and
7 deletions
... | @@ -11,6 +11,8 @@ from selenium.common.exceptions import NoSuchElementException | ... | @@ -11,6 +11,8 @@ from selenium.common.exceptions import NoSuchElementException |
11 | from hanspell import spell_checker | 11 | from hanspell import spell_checker |
12 | from bs4 import BeautifulSoup | 12 | from bs4 import BeautifulSoup |
13 | from datetime import datetime, timedelta | 13 | from datetime import datetime, timedelta |
14 | +from pytz import timezone | ||
15 | +# from pyvirtualdisplay import Display | ||
14 | 16 | ||
15 | BASE_DIR = os.path.dirname(os.path.realpath(__file__)) | 17 | BASE_DIR = os.path.dirname(os.path.realpath(__file__)) |
16 | 18 | ||
... | @@ -32,6 +34,9 @@ def TextPreprocess(text): | ... | @@ -32,6 +34,9 @@ def TextPreprocess(text): |
32 | return text | 34 | return text |
33 | 35 | ||
34 | def GetData(): | 36 | def GetData(): |
37 | + # display = Display(visible=0, size=(1920,1080)) | ||
38 | + # display.start() | ||
39 | + | ||
35 | login_info = { | 40 | login_info = { |
36 | 'userID' : 'qdw0313', | 41 | 'userID' : 'qdw0313', |
37 | 'userpw' : 'fejUfrQxHWwtcGcP0' | 42 | 'userpw' : 'fejUfrQxHWwtcGcP0' |
... | @@ -47,7 +52,10 @@ def GetData(): | ... | @@ -47,7 +52,10 @@ def GetData(): |
47 | options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47') | 52 | options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47') |
48 | 53 | ||
49 | # driver = webdriver.Chrome(r'C:\Users\E_N__\Desktop\chromedriver.exe', options=options) | 54 | # driver = webdriver.Chrome(r'C:\Users\E_N__\Desktop\chromedriver.exe', options=options) |
50 | - driver = webdriver.Chrome(BASE_DIR + '/chromedriver.exe', options=options) | 55 | + # driver = webdriver.Chrome(executable_path=BASE_DIR + '/chromedriver.exe', options=options) |
56 | + driver = webdriver.Chrome(options=options) | ||
57 | + utc_patam = {'timezoneId': 'Asia/Seoul'} | ||
58 | + driver.execute_cdp_cmd('Emulation.setTimezoneOverride', utc_patam) | ||
51 | 59 | ||
52 | driver.get('about:blank') | 60 | driver.get('about:blank') |
53 | driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});") | 61 | driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});") |
... | @@ -61,8 +69,12 @@ def GetData(): | ... | @@ -61,8 +69,12 @@ def GetData(): |
61 | sleeptime() | 69 | sleeptime() |
62 | 70 | ||
63 | # 국제캠 자게 | 71 | # 국제캠 자게 |
72 | + KST = timezone('Asia/Seoul') | ||
73 | + today = datetime.utcnow().astimezone(KST) | ||
74 | + # today = datetime.today() | ||
75 | + | ||
64 | sleeptime() | 76 | sleeptime() |
65 | - yesterday = (datetime.today() - timedelta(1)).strftime('%m/%d %H:%M') | 77 | + yesterday = (today - timedelta(1)).strftime('%m/%d %H:%M') |
66 | print(yesterday) | 78 | print(yesterday) |
67 | swt = True | 79 | swt = True |
68 | page = 0 | 80 | page = 0 |
... | @@ -85,8 +97,8 @@ def GetData(): | ... | @@ -85,8 +97,8 @@ def GetData(): |
85 | html = driver.page_source | 97 | html = driver.page_source |
86 | soup = BeautifulSoup(html, 'html.parser') | 98 | soup = BeautifulSoup(html, 'html.parser') |
87 | 99 | ||
88 | - TitleList = soup.select('#container > div.wrap.articles > article > a > h2') | ||
89 | DateList = soup.select('#container > div.wrap.articles > article > a > time') | 100 | DateList = soup.select('#container > div.wrap.articles > article > a > time') |
101 | + TitleList = soup.select('#container > div.wrap.articles > article > a > h2') | ||
90 | 102 | ||
91 | for post in zip(TitleList, DateList): | 103 | for post in zip(TitleList, DateList): |
92 | title = TextPreprocess(post[0].text) | 104 | title = TextPreprocess(post[0].text) |
... | @@ -103,13 +115,16 @@ def GetData(): | ... | @@ -103,13 +115,16 @@ def GetData(): |
103 | content = TextPreprocess(content) | 115 | content = TextPreprocess(content) |
104 | post_df = post_df.append(pd.DataFrame([[title, content]], | 116 | post_df = post_df.append(pd.DataFrame([[title, content]], |
105 | columns=['title', 'content'])) | 117 | columns=['title', 'content'])) |
118 | + print(post) | ||
119 | + print(content) | ||
106 | if post[1].text <= yesterday: | 120 | if post[1].text <= yesterday: |
121 | + swt = False | ||
107 | break | 122 | break |
108 | - break | 123 | + print('next page') |
124 | + | ||
125 | + post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False) | ||
126 | + print("CVS file saved") | ||
109 | 127 | ||
110 | - # post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False) | ||
111 | - # print("CVS file saved") | ||
112 | - # | ||
113 | # with open('data.json', 'w+', encoding='utf-8-sig') as json_file: | 128 | # with open('data.json', 'w+', encoding='utf-8-sig') as json_file: |
114 | # for post in zip(post_df['title'].tolist(), post_df['content'].tolist()): | 129 | # for post in zip(post_df['title'].tolist(), post_df['content'].tolist()): |
115 | # json.dump({ | 130 | # json.dump({ | ... | ... |
-
Please register or login to post a comment