김승훈

crawling info

This diff is collapsed. Click to expand it.
1 +{
2 + "name": "cheerio",
3 + "version": "1.0.0",
4 + "description": "",
5 + "main": "practice.js",
6 + "scripts": {
7 + "test": "echo \"Error: no test specified\" && exit 1"
8 + },
9 + "author": "",
10 + "license": "ISC",
11 + "dependencies": {
12 + "selenium-webdriver": "^4.0.0-alpha.1",
13 + "webdriverio": "^4.14.0"
14 + }
15 +}
1 +var async = require('async');
2 +var webdriver = require('selenium-webdriver');
3 +var options = {desiredCapabilities: {browserName: 'chrome'}};
4 +const {Builder, By, Key, until} = require('selenium-webdriver');
5 +
6 +var driver = new webdriver.Builder().
7 + withCapabilities(webdriver.Capabilities.chrome()).
8 + build();
9 +
10 + let url = 'https://people.search.naver.com/search.naver?where=nexearch&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&sm=tab_etc&ie=utf8&key=PeopleService&os=94702';
11 +
12 + driver
13 + .get(url)
14 + .then(() => {
15 + driver
16 + .findElement(webdriver.By.id('pagination_76'))
17 + .then(paginationBtn => {
18 + paginationBtn.findElements(webdriver.By.className('bt_next'))
19 + .then(elemsBtn => {
20 + var cnt = 1;
21 +
22 + function getContentsAndClickNext (callback) {
23 + console.log('higetcontests', cnt);
24 + cnt++;
25 + driver
26 + .findElement(webdriver.By.id('listUI_76'))
27 + .then(contentsUI => {
28 + contentsUI
29 + .findElements(webdriver.By.tagName('li'))
30 + .then(elems => {
31 + elems.forEach(elem => {
32 + elem
33 + .getText()
34 + .then(text => {
35 + console.log(text);
36 + // 내 추측 : stale해지면, boolean 값으로 driver 어딘가에 true설정되는것 같아
37 + // wait이라는 함수를 써서, 이게 false 가 될 때까지 기다린 다음에 getText()를 해.
38 + // ok?
39 + })
40 + .catch(error => {
41 + // 문제가 있으면 이곳을 의심해볼것! 변수 error를 체크해보기!
42 + // console.log('really?? exception!');
43 + })
44 + })
45 +
46 + elemsBtn[0].click();
47 +
48 + sleep(50).then(() => {
49 + // Do something after the sleep!
50 + });
51 + callback();
52 + })
53 + .catch(error => {
54 + console.log("asdfasdf!!', e");
55 + })
56 + })
57 + .catch(error => {
58 + console.log('fuck!@#', error);
59 + })
60 + }
61 +
62 + async.whilst(
63 + function() {
64 + console.log('whilist result', cnt < 5);
65 + return cnt < 5;
66 + },
67 + getContentsAndClickNext,
68 + function(e) {
69 + console.log('Exception', e)
70 + }
71 + );
72 + })
73 + .catch(error => {
74 + console.log('Exception 4444', error);
75 + }); // 다음 컨텐츠로 가는 버튼(2개임. 하나는 평소용 두번째는 더이상 갈 페이지가 없을 때 쓰는 버튼)
76 + })
77 + .catch(error => {
78 + console.log('Exception 555', error);
79 + }); // 방송 컨텐츠 페이지네이션 버튼
80 +
81 +
82 + // sleep time expects milliseconds
83 + function sleep (time) {
84 + return new Promise((resolve) => setTimeout(resolve, time));
85 + }
86 +
87 + // Usage!
88 + sleep(8000).then(() => {
89 + // Do something after the sleep!
90 + });
91 +
92 + });
93 +