Showing
8 changed files
with
51 additions
and
66 deletions
.idea/Todays_Issue.iml
deleted
100644 → 0
1 | -<?xml version="1.0" encoding="UTF-8"?> | ||
2 | -<module type="PYTHON_MODULE" version="4"> | ||
3 | - <component name="NewModuleRootManager"> | ||
4 | - <content url="file://$MODULE_DIR$" /> | ||
5 | - <orderEntry type="jdk" jdkName="Remote Python 3.7.6 (sftp://root@163.180.146.131:25000/workspace/shared_nt/project01/venv/bin/python3)" jdkType="Python SDK" /> | ||
6 | - <orderEntry type="sourceFolder" forTests="false" /> | ||
7 | - </component> | ||
8 | -</module> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
.idea/deployment.xml
deleted
100644 → 0
1 | -<?xml version="1.0" encoding="UTF-8"?> | ||
2 | -<project version="4"> | ||
3 | - <component name="PublishConfigData" autoUpload="Always" serverName="root@163.180.146.131:25000" remoteFilesAllowedToDisappearOnAutoupload="false"> | ||
4 | - <serverData> | ||
5 | - <paths name="root@163.180.146.131:25000"> | ||
6 | - <serverdata> | ||
7 | - <mappings> | ||
8 | - <mapping deploy="/tmp/pycharm_project_111" local="$PROJECT_DIR$" /> | ||
9 | - </mappings> | ||
10 | - </serverdata> | ||
11 | - </paths> | ||
12 | - </serverData> | ||
13 | - <option name="myAutoUpload" value="ALWAYS" /> | ||
14 | - </component> | ||
15 | -</project> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
.idea/misc.xml
deleted
100644 → 0
1 | -<?xml version="1.0" encoding="UTF-8"?> | ||
2 | -<project version="4"> | ||
3 | - <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.6 (sftp://root@163.180.146.131:25000/workspace/shared_nt/project01/venv/bin/python3)" project-jdk-type="Python SDK" /> | ||
4 | -</project> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
.idea/modules.xml
deleted
100644 → 0
1 | -<?xml version="1.0" encoding="UTF-8"?> | ||
2 | -<project version="4"> | ||
3 | - <component name="ProjectModuleManager"> | ||
4 | - <modules> | ||
5 | - <module fileurl="file://$PROJECT_DIR$/.idea/Todays_Issue.iml" filepath="$PROJECT_DIR$/.idea/Todays_Issue.iml" /> | ||
6 | - </modules> | ||
7 | - </component> | ||
8 | -</project> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
.idea/vcs.xml
deleted
100644 → 0
1 | +import csv | ||
1 | import time | 2 | import time |
3 | +import random | ||
4 | +import os | ||
2 | 5 | ||
3 | from selenium import webdriver | 6 | from selenium import webdriver |
4 | from bs4 import BeautifulSoup | 7 | from bs4 import BeautifulSoup |
5 | -import random | 8 | +from datetime import datetime, timedelta\ |
6 | 9 | ||
7 | def sleeptime(): | 10 | def sleeptime(): |
8 | rand = random.uniform(1,3) | 11 | rand = random.uniform(1,3) |
... | @@ -10,15 +13,15 @@ def sleeptime(): | ... | @@ -10,15 +13,15 @@ def sleeptime(): |
10 | 13 | ||
11 | 14 | ||
12 | login_info = { | 15 | login_info = { |
13 | - 'userID' : 'qdw0313', | 16 | + 'userID' : 'id', |
14 | - 'userpw' : 'Uvjbt7G8dsnR6UN' | 17 | + 'userpw' : 'pw' |
15 | } | 18 | } |
16 | 19 | ||
17 | options = webdriver.ChromeOptions() | 20 | options = webdriver.ChromeOptions() |
18 | options.add_argument('headless') | 21 | options.add_argument('headless') |
19 | options.add_argument('no-sandbox') | 22 | options.add_argument('no-sandbox') |
20 | options.add_argument('window-size=1920x1080') | 23 | options.add_argument('window-size=1920x1080') |
21 | -# options.add_argument('disable-gpu') | 24 | +options.add_argument('disable-gpu') |
22 | options.add_argument('disable-dev-shm-usage') | 25 | options.add_argument('disable-dev-shm-usage') |
23 | options.add_argument('lang=ko_KR') | 26 | options.add_argument('lang=ko_KR') |
24 | options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47') | 27 | options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.47') |
... | @@ -36,20 +39,46 @@ driver.find_element_by_name('password').send_keys(login_info['userpw']) | ... | @@ -36,20 +39,46 @@ driver.find_element_by_name('password').send_keys(login_info['userpw']) |
36 | driver.find_element_by_class_name('submit').click() | 39 | driver.find_element_by_class_name('submit').click() |
37 | sleeptime() | 40 | sleeptime() |
38 | 41 | ||
39 | - | ||
40 | # 국제캠 자게 | 42 | # 국제캠 자게 |
41 | -driver.find_element_by_xpath('//*[@id="submenu"]/div/div[2]/ul/li[1]/a').click() | ||
42 | sleeptime() | 43 | sleeptime() |
44 | +posts = [] | ||
45 | +yesterday = (datetime.today() - timedelta(1)).strftime('%m/%d %H:%M') | ||
46 | +swt = True | ||
47 | +page = 1 | ||
48 | + | ||
49 | +while swt: | ||
50 | + if not posts: | ||
51 | + driver.find_element_by_xpath('//*[@id="submenu"]/div/div[2]/ul/li[1]/a').click() | ||
52 | + else: | ||
53 | + if page == 1: | ||
54 | + driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[2]/a').click() | ||
55 | + page += 1 | ||
56 | + elif page == 2: | ||
57 | + element = driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[2]/a[2]') | ||
58 | + driver.execute_script("arguments[0].click();", element) | ||
59 | + sleeptime() | ||
60 | + page += 1 | ||
61 | + else: | ||
62 | + element = driver.find_element_by_xpath('//*[@id="container"]/div[2]/div[2]/a[3]') | ||
63 | + driver.execute_script("arguments[0].click();", element) | ||
64 | + sleeptime() | ||
65 | + | ||
66 | + sleeptime() | ||
67 | + html = driver.page_source | ||
68 | + soup = BeautifulSoup(html, 'html.parser') | ||
69 | + | ||
70 | + TitleList = soup.select('#container > div.wrap.articles > article > a > h2') | ||
71 | + ContentList = soup.select('#container > div.wrap.articles > article > a > p') | ||
72 | + DateList = soup.select('#container > div.wrap.articles > article > a > time') | ||
73 | + | ||
74 | + for post in zip(TitleList, ContentList, DateList): | ||
75 | + posts.append([post[0].text, post[1].text]) | ||
76 | + if post[2].text == yesterday: | ||
77 | + swt = False | ||
78 | + break | ||
43 | 79 | ||
44 | -# driver.find_element_by_css_selector('li.none').click() | ||
45 | -html = driver.page_source | ||
46 | -soup = BeautifulSoup(html, 'html.parser') | ||
47 | -#container > div.wrap.articles > article:nth-child(2) > a > h2 | ||
48 | -#container > div.wrap.articles > article:nth-child(3) > a > h2 | ||
49 | -#find_all | ||
50 | -TitleList = soup.select('#container > div.wrap.articles > article > a > h2') | ||
51 | - | ||
52 | -data = [] | ||
53 | -for title in TitleList: | ||
54 | - data += title | ||
55 | -print(data) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
80 | +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | ||
81 | +with open(os.path.join(BASE_DIR + '/', 'data.csv'), 'w+', encoding='utf-8-sig', newline='') as file: | ||
82 | + writer = csv.writer(file) | ||
83 | + for idx in range(len(posts)): | ||
84 | + writer.writerow(posts[idx]) | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment