crawling_samename.js
7.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
var async = require('async');
var webdriver = require('selenium-webdriver');
var options = {desiredCapabilities: {browserName: 'chrome'}};
var JASON = require('jason')
const {Builder, By, Key, until} = require('selenium-webdriver');
var ajason = new Object()
var aname = []
var asrc=[]
var alink=[]
var driver = new webdriver.Builder().
withCapabilities(webdriver.Capabilities.chrome()).
build();
let url1 = "https://people.search.naver.com/search.naver?sm=tab_hty&where=nexearch&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&ie=utf8&x=0&y=0"
var driver_1 = new webdriver.Builder().
withCapabilities(webdriver.Capabilities.chrome()).
build();
driver
.get(url1).then(() => {
driver
.findElements(webdriver.By.className('result_profile'))
.then(whoclasses=> {
whoclasses.forEach(whoclass => {
whoclass.findElements(webdriver.By.className('thmb'))
.then(img_thmbs => {
img_thmbs.forEach(img_thmb => {
img_thmb.findElements(webdriver.By.className('thmb_img')).then(img_src => {
img_src[0].getAttribute("src")
.then(src => {
asrc.push(src)
console.log(src)
}).then(() => {
console.log(asrc)
})
})
})
whoclass.findElements(webdriver.By.tagName("strong"))
.then(name_temp => {
name_temp[0].getText().then(name => {
aname.push(name)
})
})
})
whoclass.findElements(webdriver.By.className('who'))
.then(who => {
who[0].findElements(webdriver.By.className('name'))
.then(findname => {
findname[0].getAttribute("href")
.then(link => {
alink.push(link)
}).then(() => {
driver_1.get(alink[0]).then(() => {
driver_1
.findElement(webdriver.By.id('pagination_76'))
.then(paginationBtn => {
paginationBtn.findElements(webdriver.By.className('bt_next'))
.then(elemsBtn => {
var cnt = 1;
function getContentsAndClickNext (callback) {
console.log('higetcontests', cnt);
cnt++;
driver_1
.findElement(webdriver.By.id('listUI_76'))
.then(contentsUI => {
contentsUI
.findElements(webdriver.By.tagName('li'))
.then(elems => {
elems.forEach(elem => {
elem
.getText()
.then(text => {
console.log(text);
// 내 추측 : stale해지면, boolean 값으로 driver 어딘가에 true설정되는것 같아
// wait이라는 함수를 써서, 이게 false 가 될 때까지 기다린 다음에 getText()를 해.
// ok?
})
.catch(error => {
// 문제가 있으면 이곳을 의심해볼것! 변수 error를 체크해보기!
// console.log('really?? exception!');
})
})
elemsBtn[0].click();
sleep(50).then(() => {
// Do something after the sleep!
});
callback();
})
.catch(error => {
console.log("asdfasdf!!', e");
})
})
.catch(error => {
console.log('fuck!@#', error);
})
}
async.whilst(
function() {
console.log('whilist result', cnt < 5);
return cnt < 5;
},
getContentsAndClickNext,
function(e) {
console.log('Exception', e)
}
);
})
.catch(error => {
console.log('Exception 4444', error);
}); // 다음 컨텐츠로 가는 버튼(2개임. 하나는 평소용 두번째는 더이상 갈 페이지가 없을 때 쓰는 버튼)
})
.catch(error => {
console.log('Exception 555', error);
}); // 방송 컨텐츠 페이지네이션 버튼
// sleep time expects milliseconds
function sleep (time) {
return new Promise((resolve) => setTimeout(resolve, time));
}
// Usage!
sleep(8000).then(() => {
// Do something after the sleep!
});
});
})
})
})
})
})
})