김승훈

add crawling_time,samename,more_data

1 +var async = require('async');
2 +var webdriver = require('selenium-webdriver');
3 +var options = {desiredCapabilities: {browserName: 'chrome'}};
4 +var JASON = require('jason')
5 +const {Builder, By, Key, until} = require('selenium-webdriver');
6 +var iconv = require('iconv-lite')
7 +
8 +var driver = new webdriver.Builder().
9 + withCapabilities(webdriver.Capabilities.chrome()).
10 + build();
11 + let url1 = "https://search.naver.com/search.naver?sm=top_hty&fbm=1&ie=utf8&query=%EB%9F%B0%EB%8B%9D%EB%A7%A8"
12 + driver
13 + .get(url1).then(() => {
14 + driver
15 + .findElements(webdriver.By.className('brcs_detail'))
16 + .then(broad_info => {
17 + broad_info[0].findElement(webdriver.By.tagName('dl'))
18 + .then(more_data => {
19 + more_data.findElement(webdriver.By.tagName('dd'))
20 + .then(one_line => {
21 + one_line.findElement(webdriver.By.className("inline"))
22 + .then(isbroad => {
23 + isbroad.getText()
24 + .then(isbroad_string => {
25 + console.log(isbroad_string)
26 + broadcasting=isbroad_string.substring(0,4)
27 + isbroad = isbroad_string.substring(18,21)
28 + br_date = isbroad_string.substring(23,24)
29 + br_date = br_date + "요일"
30 + br_time = isbroad_string.substring(26,34)
31 +
32 + console.log(broadcasting,isbroad,br_date,br_time)
33 + if(isbroad == "방영중") {
34 + driver.findElement(webdriver.By.id('nx_query')).clear().then(( )=> {
35 + //var buf = iconv.encode("편성표", "euc-kr")
36 + //console.log(buf)
37 + //var encodestr='';
38 + //for(var i=0; i<buf.length;i++) {
39 + // encodestr+='%'+buf[i].toString('16')
40 + // }
41 + //encodestr = encodestr.toUpperCase();
42 + //console.log(iconv.encode(encodestr, 'EUC-KR').toString())
43 + //console.log(encodestr)
44 + var buf = iconv.encode('편성표','euckr')
45 + var param = buf.toString('binary')
46 + console.log(param)
47 + driver.findElement(webdriver.By.id('nx_query')).sendKeys(broadcasting,br_date,param)
48 + driver.findElement(webdriver.By.className('spnew ico_search')).click()
49 + })
50 +
51 + }
52 + else {
53 +
54 + }
55 + })
56 + })
57 + })
58 + })
59 + })
60 + })
...@@ -4,6 +4,8 @@ var options = {desiredCapabilities: {browserName: 'chrome'}}; ...@@ -4,6 +4,8 @@ var options = {desiredCapabilities: {browserName: 'chrome'}};
4 var JASON = require('jason') 4 var JASON = require('jason')
5 const {Builder, By, Key, until} = require('selenium-webdriver'); 5 const {Builder, By, Key, until} = require('selenium-webdriver');
6 6
7 +
8 +
7 var ajason = new Object() 9 var ajason = new Object()
8 var aname = [] 10 var aname = []
9 var asrc=[] 11 var asrc=[]
...@@ -11,7 +13,11 @@ var alink=[] ...@@ -11,7 +13,11 @@ var alink=[]
11 var driver = new webdriver.Builder(). 13 var driver = new webdriver.Builder().
12 withCapabilities(webdriver.Capabilities.chrome()). 14 withCapabilities(webdriver.Capabilities.chrome()).
13 build(); 15 build();
14 -let url1 = "https://people.search.naver.com/search.naver?sm=sbx_hty&where=nexearch&ie=utf8&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&x=0&y=0" 16 +let url1 = "https://people.search.naver.com/search.naver?sm=tab_hty&where=nexearch&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&ie=utf8&x=0&y=0"
17 +var driver_1 = new webdriver.Builder().
18 +withCapabilities(webdriver.Capabilities.chrome()).
19 +build();
20 +
15 driver 21 driver
16 .get(url1).then(() => { 22 .get(url1).then(() => {
17 driver 23 driver
...@@ -25,21 +31,13 @@ driver ...@@ -25,21 +31,13 @@ driver
25 31
26 img_src[0].getAttribute("src") 32 img_src[0].getAttribute("src")
27 .then(src => { 33 .then(src => {
28 - if(src) { 34 + asrc.push(src)
29 - asrc.push(src) 35 + console.log(src)
30 - }
31 - else {
32 - asrc.push("none")
33 - }
34 }).then(() => { 36 }).then(() => {
35 - 37 + console.log(asrc)
36 - console.log(asrc,alink,aname)
37 }) 38 })
38 -
39 -
40 }) 39 })
41 }) 40 })
42 -
43 whoclass.findElements(webdriver.By.tagName("strong")) 41 whoclass.findElements(webdriver.By.tagName("strong"))
44 .then(name_temp => { 42 .then(name_temp => {
45 name_temp[0].getText().then(name => { 43 name_temp[0].getText().then(name => {
...@@ -54,6 +52,86 @@ driver ...@@ -54,6 +52,86 @@ driver
54 findname[0].getAttribute("href") 52 findname[0].getAttribute("href")
55 .then(link => { 53 .then(link => {
56 alink.push(link) 54 alink.push(link)
55 + }).then(() => {
56 + driver_1.get(alink[0]).then(() => {
57 + driver_1
58 + .findElement(webdriver.By.id('pagination_76'))
59 + .then(paginationBtn => {
60 + paginationBtn.findElements(webdriver.By.className('bt_next'))
61 + .then(elemsBtn => {
62 + var cnt = 1;
63 +
64 + function getContentsAndClickNext (callback) {
65 + console.log('higetcontests', cnt);
66 + cnt++;
67 + driver_1
68 + .findElement(webdriver.By.id('listUI_76'))
69 + .then(contentsUI => {
70 + contentsUI
71 + .findElements(webdriver.By.tagName('li'))
72 + .then(elems => {
73 + elems.forEach(elem => {
74 + elem
75 + .getText()
76 + .then(text => {
77 + console.log(text);
78 + // 내 추측 : stale해지면, boolean 값으로 driver 어딘가에 true설정되는것 같아
79 + // wait이라는 함수를 써서, 이게 false 가 될 때까지 기다린 다음에 getText()를 해.
80 + // ok?
81 + })
82 + .catch(error => {
83 + // 문제가 있으면 이곳을 의심해볼것! 변수 error를 체크해보기!
84 + // console.log('really?? exception!');
85 + })
86 + })
87 +
88 + elemsBtn[0].click();
89 +
90 + sleep(50).then(() => {
91 + // Do something after the sleep!
92 + });
93 + callback();
94 + })
95 + .catch(error => {
96 + console.log("asdfasdf!!', e");
97 + })
98 + })
99 + .catch(error => {
100 + console.log('fuck!@#', error);
101 + })
102 + }
103 +
104 + async.whilst(
105 + function() {
106 + console.log('whilist result', cnt < 5);
107 + return cnt < 5;
108 + },
109 + getContentsAndClickNext,
110 + function(e) {
111 + console.log('Exception', e)
112 + }
113 + );
114 + })
115 + .catch(error => {
116 + console.log('Exception 4444', error);
117 + }); // 다음 컨텐츠로 가는 버튼(2개임. 하나는 평소용 두번째는 더이상 갈 페이지가 없을 때 쓰는 버튼)
118 + })
119 + .catch(error => {
120 + console.log('Exception 555', error);
121 + }); // 방송 컨텐츠 페이지네이션 버튼
122 +
123 +
124 + // sleep time expects milliseconds
125 + function sleep (time) {
126 + return new Promise((resolve) => setTimeout(resolve, time));
127 + }
128 +
129 + // Usage!
130 + sleep(8000).then(() => {
131 + // Do something after the sleep!
132 + });
133 +
134 + });
57 }) 135 })
58 }) 136 })
59 }) 137 })
......
1 +var async = require('async');
2 +var webdriver = require('selenium-webdriver');
3 +var options = {desiredCapabilities: {browserName: 'chrome'}};
4 +var JASON = require('jason')
5 +const {Builder, By, Key, until} = require('selenium-webdriver');
6 +var iconv = require('iconv-lite')
7 +
8 +var findtitle = "런닝맨"
9 +var driver = new webdriver.Builder().
10 + withCapabilities(webdriver.Capabilities.chrome()).
11 + build();
12 + let url1 = "https://search.naver.com/search.naver?sm=tab_hty.top&where=nexearch&query=SBS+%EC%9D%BC%EC%9A%94%EC%9D%BC+%ED%8E%B8%EC%84%B1%ED%91%9C&oquery=SBS+%EC%9B%94%EC%9A%94%EC%9D%BC+%ED%8E%B8%EC%84%B1%ED%91%9C&tqi=UsDTSlpySD0ssv33OfVssssssXK-499490"
13 + driver
14 + .get(url1).then(() => {
15 + driver
16 + .findElements(webdriver.By.className('cont_inner type_day _scheduleArea'))
17 + .then(contentsearch_section=> {
18 + contentsearch_section[0].findElements(webdriver.By.tagName('tbody'))
19 + .then(tbody => {
20 + tbody[0].findElements(webdriver.By.tagName('tr'))
21 + .then(trs => {
22 + trs.forEach(tr => {
23 + tr.getText()
24 + .then(content => {
25 + if(content.indexOf(findtitle) != -1) {
26 + console.log(content)
27 + }
28 + })
29 + })
30 + })
31 + })
32 + })
33 + })