박은주

Set UTC(Asia/Seoul)

......@@ -75,7 +75,6 @@ def GetData():
sleeptime()
yesterday = (today - timedelta(1)).strftime('%m/%d %H:%M')
print(yesterday)
swt = True
page = 0
......@@ -93,6 +92,7 @@ def GetData():
page += 1
else:
Click('//*[@id="container"]/div[2]/div[2]/a[3]', driver)
page += 1
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
......@@ -115,27 +115,18 @@ def GetData():
content = TextPreprocess(content)
post_df = post_df.append(pd.DataFrame([[title, content]],
columns=['title', 'content']))
print(post)
print(content)
print(title)
print(post[1].text)
if post[1].text <= yesterday:
swt = False
break
print('next page')
print('page : {}'.format(page))
post_df.to_csv('data.csv', mode='w', encoding='utf-8-sig', index=False)
print("CVS file saved")
# with open('data.json', 'w+', encoding='utf-8-sig') as json_file:
# for post in zip(post_df['title'].tolist(), post_df['content'].tolist()):
# json.dump({
# "document" :
# {
# "type" : "PLAIN_TEXT",
# "content" : post[0] + post[1]
# },
# "encodingType" : "UTF8"
# }, json_file, ensure_ascii=False)
# print("JSON file saved")
with open(BASE_DIR + 'date.txt', 'w', encoding='utf-8-sig') as txt_file:
txt_file.write((today - timedelta(1)).strftime("%Y년 %m월 %d일"))
with open('data.txt', 'w', encoding='utf-8-sig') as txt_file:
for post in zip(post_df['title'].tolist(), post_df['content'].tolist()):
......