박은주

delete folder

......@@ -11,3 +11,6 @@
chromedriver.exe
/venv/
/.idea/
*.iml
*.csv
*.xml
\ No newline at end of file
......
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Remote Python 3.7.6 (sftp://root@163.180.146.131:25000/workspace/shared_nt/project01/venv/bin/python3)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData" autoUpload="Always" serverName="root@163.180.146.131:25000" remoteFilesAllowedToDisappearOnAutoupload="false">
<serverData>
<paths name="root@163.180.146.131:25000">
<serverdata>
<mappings>
<mapping deploy="/tmp/pycharm_project_111" local="$PROJECT_DIR$" />
</mappings>
</serverdata>
</paths>
</serverData>
<option name="myAutoUpload" value="ALWAYS" />
</component>
</project>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.6 (sftp://root@163.180.146.131:25000/workspace/shared_nt/project01/venv/bin/python3)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Todays_Issue.iml" filepath="$PROJECT_DIR$/.idea/Todays_Issue.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
import csv
import time
import random
import os
from selenium import webdriver
from bs4 import BeautifulSoup
import random
from datetime import datetime, timedelta\
def sleeptime():
rand = random.uniform(1,3)
......@@ -10,15 +13,15 @@ def sleeptime():
login_info = {
'userID' : 'qdw0313',
'userpw' : 'Uvjbt7G8dsnR6UN'
'userID' : 'id',
'userpw' : 'pw'
}
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('no-sandbox')
options.add_argument('window-size=1920x1080')
# options.add_argument('disable-gpu')
options.add_argument('disable-gpu')
options.add_argument('disable-dev-shm-usage')
options.add_argument('lang=ko_KR')
options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47')
......@@ -36,20 +39,46 @@ driver.find_element_by_name('password').send_keys(login_info['userpw'])
driver.find_element_by_class_name('submit').click()
sleeptime()
# 국제캠 자게
driver.find_element_by_xpath('//*[@id="submenu"]/div/div[2]/ul/li[1]/a').click()
sleeptime()
posts = []
yesterday = (datetime.today() - timedelta(1)).strftime('%m/%d %H:%M')
swt = True
page = 1
while swt:
if not posts:
driver.find_element_by_xpath('//*[@id="submenu"]/div/div[2]/ul/li[1]/a').click()
else:
if page == 1:
driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[2]/a').click()
page += 1
elif page == 2:
element = driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[2]/a[2]')
driver.execute_script("arguments[0].click();", element)
sleeptime()
page += 1
else:
element = driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[2]/a[3]')
driver.execute_script("arguments[0].click();", element)
sleeptime()
sleeptime()
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
TitleList = soup.select('#container > div.wrap.articles > article > a > h2')
ContentList = soup.select('#container > div.wrap.articles > article > a > p')
DateList = soup.select('#container > div.wrap.articles > article > a > time')
for post in zip(TitleList, ContentList, DateList):
posts.append([post[0].text, post[1].text])
if post[2].text == yesterday:
swt = False
break
# driver.find_element_by_css_selector('li.none').click()
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
#container > div.wrap.articles > article:nth-child(2) > a > h2
#container > div.wrap.articles > article:nth-child(3) > a > h2
#find_all
TitleList = soup.select('#container > div.wrap.articles > article > a > h2')
data = []
for title in TitleList:
data += title
print(data)
\ No newline at end of file
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(BASE_DIR + '/', 'data.csv'), 'w+', encoding='utf-8-sig', newline='') as file:
writer = csv.writer(file)
for idx in range(len(posts)):
writer.writerow(posts[idx])
\ No newline at end of file
......