박은주

Add txt type output

Showing 1 changed file with 20 additions and 15 deletions
......@@ -34,8 +34,8 @@ def TextPreprocess(text):
def GetData():
login_info = {
'userID' : '**********',
'userpw' : '**********'
'userID' : 'qdw0313',
'userpw' : 'fejUfrQxHWwtcGcP0'
}
options = webdriver.ChromeOptions()
......@@ -108,19 +108,24 @@ def GetData():
break
break
post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False)
print("CVS file saved")
with open('data.json', 'w+', encoding='utf-8-sig') as json_file:
# post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False)
# print("CVS file saved")
#
# with open('data.json', 'w+', encoding='utf-8-sig') as json_file:
# for post in zip(post_df['title'].tolist(), post_df['content'].tolist()):
# json.dump({
# "document" :
# {
# "type" : "PLAIN_TEXT",
# "content" : post[0] + post[1]
# },
# "encodingType" : "UTF8"
# }, json_file, ensure_ascii=False)
# print("JSON file saved")
with open('data.txt', 'w', encoding='utf-8-sig') as txt_file:
for post in zip(post_df['title'].tolist(), post_df['content'].tolist()):
json.dump({
"document" :
{
"type" : "PLAIN_TEXT",
"content" : post[0] + post[1]
},
"encodingType" : "UTF8"
}, json_file, ensure_ascii=False)
print("JSON file saved")
txt_file.write(post[0] + post[1] + '.\n')
print("txt file saved")
GetData()
\ No newline at end of file
......