Showing
9 changed files
with
84 additions
and
0 deletions
.gitignore
0 → 100644
.idea/Todays_Issue.iml
0 → 100644
1 | +<?xml version="1.0" encoding="UTF-8"?> | ||
2 | +<module type="PYTHON_MODULE" version="4"> | ||
3 | + <component name="NewModuleRootManager"> | ||
4 | + <content url="file://$MODULE_DIR$" /> | ||
5 | + <orderEntry type="jdk" jdkName="Remote Python 3.7.6 (sftp://root@163.180.146.131:25000/workspace/shared_nt/project01/venv/bin/python3)" jdkType="Python SDK" /> | ||
6 | + <orderEntry type="sourceFolder" forTests="false" /> | ||
7 | + </component> | ||
8 | +</module> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
.idea/deployment.xml
0 → 100644
1 | +<?xml version="1.0" encoding="UTF-8"?> | ||
2 | +<project version="4"> | ||
3 | + <component name="PublishConfigData" autoUpload="Always" serverName="root@163.180.146.131:25000" remoteFilesAllowedToDisappearOnAutoupload="false"> | ||
4 | + <serverData> | ||
5 | + <paths name="root@163.180.146.131:25000"> | ||
6 | + <serverdata> | ||
7 | + <mappings> | ||
8 | + <mapping deploy="/tmp/pycharm_project_111" local="$PROJECT_DIR$" /> | ||
9 | + </mappings> | ||
10 | + </serverdata> | ||
11 | + </paths> | ||
12 | + </serverData> | ||
13 | + <option name="myAutoUpload" value="ALWAYS" /> | ||
14 | + </component> | ||
15 | +</project> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
.idea/misc.xml
0 → 100644
1 | +<?xml version="1.0" encoding="UTF-8"?> | ||
2 | +<project version="4"> | ||
3 | + <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.6 (sftp://root@163.180.146.131:25000/workspace/shared_nt/project01/venv/bin/python3)" project-jdk-type="Python SDK" /> | ||
4 | +</project> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
.idea/modules.xml
0 → 100644
1 | +<?xml version="1.0" encoding="UTF-8"?> | ||
2 | +<project version="4"> | ||
3 | + <component name="ProjectModuleManager"> | ||
4 | + <modules> | ||
5 | + <module fileurl="file://$PROJECT_DIR$/.idea/Todays_Issue.iml" filepath="$PROJECT_DIR$/.idea/Todays_Issue.iml" /> | ||
6 | + </modules> | ||
7 | + </component> | ||
8 | +</project> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
.idea/vcs.xml
0 → 100644
README.md
0 → 100644
File mode changed
content.py
0 → 100644
1 | +from selenium import webdriver | ||
2 | +from bs4 import BeautifulSoup | ||
3 | + | ||
4 | +def GetContent(url): | ||
5 | + options = webdriver.ChromeOptions() | ||
6 | + | ||
7 | + options.add_argument('--headless') | ||
8 | + options.add_argument('--no-sandbox') | ||
9 | + options.add_argument('window-size=1920x1080') | ||
10 | + | ||
11 | + driver = webdriver.Chrome(options=options) | ||
12 | + | ||
13 | + driver.get(url) | ||
14 | + html = driver.page_source | ||
15 | + soup = BeautifulSoup(html, 'html.parser') | ||
16 | + | ||
17 | + #naverNews | ||
18 | + content = soup.select_one('#newsEndContents') | ||
19 | + res = "" | ||
20 | + for para in content.contents: | ||
21 | + stripped = str(para).strip() | ||
22 | + if stripped == "": | ||
23 | + continue | ||
24 | + if stripped[0] not in ["<", "/"]: | ||
25 | + res += str(para).strip() | ||
26 | + res.replace("'", "") | ||
27 | + | ||
28 | + return res | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment