Showing
1 changed file
with
2 additions
and
13 deletions
... | @@ -34,8 +34,8 @@ def TextPreprocess(text): | ... | @@ -34,8 +34,8 @@ def TextPreprocess(text): |
34 | 34 | ||
35 | def GetData(): | 35 | def GetData(): |
36 | login_info = { | 36 | login_info = { |
37 | - 'userID' : 'qdw0313', | 37 | + 'userID' : '**********', |
38 | - 'userpw' : 'fejUfrQxHWwtcGcP0' | 38 | + 'userpw' : '**********' |
39 | } | 39 | } |
40 | 40 | ||
41 | options = webdriver.ChromeOptions() | 41 | options = webdriver.ChromeOptions() |
... | @@ -101,23 +101,15 @@ def GetData(): | ... | @@ -101,23 +101,15 @@ def GetData(): |
101 | sleeptime() | 101 | sleeptime() |
102 | 102 | ||
103 | if not (post_df['title'] == title).any(): | 103 | if not (post_df['title'] == title).any(): |
104 | - # Click('//*[@id="container"]/div[2]/article[{}]'.format(idx)) | ||
105 | content = TextPreprocess(content) | 104 | content = TextPreprocess(content) |
106 | post_df = post_df.append(pd.DataFrame([[title, content]], | 105 | post_df = post_df.append(pd.DataFrame([[title, content]], |
107 | columns=['title', 'content'])) | 106 | columns=['title', 'content'])) |
108 | - # print("{0}. {1} : {2}".format(idx, title, content)) | ||
109 | if post[1].text <= yesterday: | 107 | if post[1].text <= yesterday: |
110 | break | 108 | break |
111 | break | 109 | break |
112 | 110 | ||
113 | post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False) | 111 | post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False) |
114 | print("CVS file saved") | 112 | print("CVS file saved") |
115 | - # print(post_df) | ||
116 | - # exit() | ||
117 | - # post_df.reset_index(drop=True, inplace=True) | ||
118 | - # post_df.to_json('data.json') | ||
119 | - # # with open('data.json', 'w', encoding='utf-8-sig') as file: | ||
120 | - # # post_df.to_json(file, force_ascii=False) | ||
121 | 113 | ||
122 | with open('data.json', 'w+', encoding='utf-8-sig') as json_file: | 114 | with open('data.json', 'w+', encoding='utf-8-sig') as json_file: |
123 | for post in zip(post_df['title'].tolist(), post_df['content'].tolist()): | 115 | for post in zip(post_df['title'].tolist(), post_df['content'].tolist()): |
... | @@ -132,6 +124,3 @@ def GetData(): | ... | @@ -132,6 +124,3 @@ def GetData(): |
132 | print("JSON file saved") | 124 | print("JSON file saved") |
133 | 125 | ||
134 | GetData() | 126 | GetData() |
... | \ No newline at end of file | ... | \ No newline at end of file |
135 | -######## TODO: JSON으로 저장 | ||
136 | -######## 형식 : { "document" : { "type" : "PLAIN_TEXT", "content" : "~~" }, "encodingType" : "UTF8" } | ||
137 | -######## GOOGLE Sentiment Analyzer 사용을 위해 | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment