Showing
1 changed file
with
6 additions
and
15 deletions
... | @@ -75,7 +75,6 @@ def GetData(): | ... | @@ -75,7 +75,6 @@ def GetData(): |
75 | 75 | ||
76 | sleeptime() | 76 | sleeptime() |
77 | yesterday = (today - timedelta(1)).strftime('%m/%d %H:%M') | 77 | yesterday = (today - timedelta(1)).strftime('%m/%d %H:%M') |
78 | - print(yesterday) | ||
79 | swt = True | 78 | swt = True |
80 | page = 0 | 79 | page = 0 |
81 | 80 | ||
... | @@ -93,6 +92,7 @@ def GetData(): | ... | @@ -93,6 +92,7 @@ def GetData(): |
93 | page += 1 | 92 | page += 1 |
94 | else: | 93 | else: |
95 | Click('//*[@id="container"]/div[2]/div[2]/a[3]', driver) | 94 | Click('//*[@id="container"]/div[2]/div[2]/a[3]', driver) |
95 | + page += 1 | ||
96 | 96 | ||
97 | html = driver.page_source | 97 | html = driver.page_source |
98 | soup = BeautifulSoup(html, 'html.parser') | 98 | soup = BeautifulSoup(html, 'html.parser') |
... | @@ -115,27 +115,18 @@ def GetData(): | ... | @@ -115,27 +115,18 @@ def GetData(): |
115 | content = TextPreprocess(content) | 115 | content = TextPreprocess(content) |
116 | post_df = post_df.append(pd.DataFrame([[title, content]], | 116 | post_df = post_df.append(pd.DataFrame([[title, content]], |
117 | columns=['title', 'content'])) | 117 | columns=['title', 'content'])) |
118 | - print(post) | 118 | + print(title) |
119 | - print(content) | 119 | + print(post[1].text) |
120 | if post[1].text <= yesterday: | 120 | if post[1].text <= yesterday: |
121 | swt = False | 121 | swt = False |
122 | break | 122 | break |
123 | - print('next page') | 123 | + print('page : {}'.format(page)) |
124 | 124 | ||
125 | post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False) | 125 | post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False) |
126 | print("CVS file saved") | 126 | print("CVS file saved") |
127 | 127 | ||
128 | - # with open('data.json', 'w+', encoding='utf-8-sig') as json_file: | 128 | + with open(BASE_DIR + 'date.txt', 'w', encoding='utf-8-sig') as txt_file: |
129 | - # for post in zip(post_df['title'].tolist(), post_df['content'].tolist()): | 129 | + txt_file.write((today - timedelta(1)).strftime("%Y년 %m월 %d일")) |
130 | - # json.dump({ | ||
131 | - # "document" : | ||
132 | - # { | ||
133 | - # "type" : "PLAIN_TEXT", | ||
134 | - # "content" : post[0] + post[1] | ||
135 | - # }, | ||
136 | - # "encodingType" : "UTF8" | ||
137 | - # }, json_file, ensure_ascii=False) | ||
138 | - # print("JSON file saved") | ||
139 | 130 | ||
140 | with open('data.txt', 'w', encoding='utf-8-sig') as txt_file: | 131 | with open('data.txt', 'w', encoding='utf-8-sig') as txt_file: |
141 | for post in zip(post_df['title'].tolist(), post_df['content'].tolist()): | 132 | for post in zip(post_df['title'].tolist(), post_df['content'].tolist()): | ... | ... |
-
Please register or login to post a comment