Showing
3 changed files
with
183 additions
and
12 deletions
crawling_br_onair_date.js
0 → 100644
1 | +var async = require('async'); | ||
2 | +var webdriver = require('selenium-webdriver'); | ||
3 | +var options = {desiredCapabilities: {browserName: 'chrome'}}; | ||
4 | +var JASON = require('jason') | ||
5 | +const {Builder, By, Key, until} = require('selenium-webdriver'); | ||
6 | +var iconv = require('iconv-lite') | ||
7 | + | ||
8 | +var driver = new webdriver.Builder(). | ||
9 | + withCapabilities(webdriver.Capabilities.chrome()). | ||
10 | + build(); | ||
11 | + let url1 = "https://search.naver.com/search.naver?sm=top_hty&fbm=1&ie=utf8&query=%EB%9F%B0%EB%8B%9D%EB%A7%A8" | ||
12 | + driver | ||
13 | + .get(url1).then(() => { | ||
14 | + driver | ||
15 | + .findElements(webdriver.By.className('brcs_detail')) | ||
16 | + .then(broad_info => { | ||
17 | + broad_info[0].findElement(webdriver.By.tagName('dl')) | ||
18 | + .then(more_data => { | ||
19 | + more_data.findElement(webdriver.By.tagName('dd')) | ||
20 | + .then(one_line => { | ||
21 | + one_line.findElement(webdriver.By.className("inline")) | ||
22 | + .then(isbroad => { | ||
23 | + isbroad.getText() | ||
24 | + .then(isbroad_string => { | ||
25 | + console.log(isbroad_string) | ||
26 | + broadcasting=isbroad_string.substring(0,4) | ||
27 | + isbroad = isbroad_string.substring(18,21) | ||
28 | + br_date = isbroad_string.substring(23,24) | ||
29 | + br_date = br_date + "요일" | ||
30 | + br_time = isbroad_string.substring(26,34) | ||
31 | + | ||
32 | + console.log(broadcasting,isbroad,br_date,br_time) | ||
33 | + if(isbroad == "방영중") { | ||
34 | + driver.findElement(webdriver.By.id('nx_query')).clear().then(( )=> { | ||
35 | + //var buf = iconv.encode("편성표", "euc-kr") | ||
36 | + //console.log(buf) | ||
37 | + //var encodestr=''; | ||
38 | + //for(var i=0; i<buf.length;i++) { | ||
39 | + // encodestr+='%'+buf[i].toString('16') | ||
40 | + // } | ||
41 | + //encodestr = encodestr.toUpperCase(); | ||
42 | + //console.log(iconv.encode(encodestr, 'EUC-KR').toString()) | ||
43 | + //console.log(encodestr) | ||
44 | + var buf = iconv.encode('편성표','euckr') | ||
45 | + var param = buf.toString('binary') | ||
46 | + console.log(param) | ||
47 | + driver.findElement(webdriver.By.id('nx_query')).sendKeys(broadcasting,br_date,param) | ||
48 | + driver.findElement(webdriver.By.className('spnew ico_search')).click() | ||
49 | + }) | ||
50 | + | ||
51 | + } | ||
52 | + else { | ||
53 | + | ||
54 | + } | ||
55 | + }) | ||
56 | + }) | ||
57 | + }) | ||
58 | + }) | ||
59 | + }) | ||
60 | + }) |
... | @@ -4,6 +4,8 @@ var options = {desiredCapabilities: {browserName: 'chrome'}}; | ... | @@ -4,6 +4,8 @@ var options = {desiredCapabilities: {browserName: 'chrome'}}; |
4 | var JASON = require('jason') | 4 | var JASON = require('jason') |
5 | const {Builder, By, Key, until} = require('selenium-webdriver'); | 5 | const {Builder, By, Key, until} = require('selenium-webdriver'); |
6 | 6 | ||
7 | + | ||
8 | + | ||
7 | var ajason = new Object() | 9 | var ajason = new Object() |
8 | var aname = [] | 10 | var aname = [] |
9 | var asrc=[] | 11 | var asrc=[] |
... | @@ -11,7 +13,11 @@ var alink=[] | ... | @@ -11,7 +13,11 @@ var alink=[] |
11 | var driver = new webdriver.Builder(). | 13 | var driver = new webdriver.Builder(). |
12 | withCapabilities(webdriver.Capabilities.chrome()). | 14 | withCapabilities(webdriver.Capabilities.chrome()). |
13 | build(); | 15 | build(); |
14 | -let url1 = "https://people.search.naver.com/search.naver?sm=sbx_hty&where=nexearch&ie=utf8&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&x=0&y=0" | 16 | +let url1 = "https://people.search.naver.com/search.naver?sm=tab_hty&where=nexearch&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&ie=utf8&x=0&y=0" |
17 | +var driver_1 = new webdriver.Builder(). | ||
18 | +withCapabilities(webdriver.Capabilities.chrome()). | ||
19 | +build(); | ||
20 | + | ||
15 | driver | 21 | driver |
16 | .get(url1).then(() => { | 22 | .get(url1).then(() => { |
17 | driver | 23 | driver |
... | @@ -25,21 +31,13 @@ driver | ... | @@ -25,21 +31,13 @@ driver |
25 | 31 | ||
26 | img_src[0].getAttribute("src") | 32 | img_src[0].getAttribute("src") |
27 | .then(src => { | 33 | .then(src => { |
28 | - if(src) { | 34 | + asrc.push(src) |
29 | - asrc.push(src) | 35 | + console.log(src) |
30 | - } | ||
31 | - else { | ||
32 | - asrc.push("none") | ||
33 | - } | ||
34 | }).then(() => { | 36 | }).then(() => { |
35 | - | 37 | + console.log(asrc) |
36 | - console.log(asrc,alink,aname) | ||
37 | }) | 38 | }) |
38 | - | ||
39 | - | ||
40 | }) | 39 | }) |
41 | }) | 40 | }) |
42 | - | ||
43 | whoclass.findElements(webdriver.By.tagName("strong")) | 41 | whoclass.findElements(webdriver.By.tagName("strong")) |
44 | .then(name_temp => { | 42 | .then(name_temp => { |
45 | name_temp[0].getText().then(name => { | 43 | name_temp[0].getText().then(name => { |
... | @@ -54,6 +52,86 @@ driver | ... | @@ -54,6 +52,86 @@ driver |
54 | findname[0].getAttribute("href") | 52 | findname[0].getAttribute("href") |
55 | .then(link => { | 53 | .then(link => { |
56 | alink.push(link) | 54 | alink.push(link) |
55 | + }).then(() => { | ||
56 | + driver_1.get(alink[0]).then(() => { | ||
57 | + driver_1 | ||
58 | + .findElement(webdriver.By.id('pagination_76')) | ||
59 | + .then(paginationBtn => { | ||
60 | + paginationBtn.findElements(webdriver.By.className('bt_next')) | ||
61 | + .then(elemsBtn => { | ||
62 | + var cnt = 1; | ||
63 | + | ||
64 | + function getContentsAndClickNext (callback) { | ||
65 | + console.log('higetcontests', cnt); | ||
66 | + cnt++; | ||
67 | + driver_1 | ||
68 | + .findElement(webdriver.By.id('listUI_76')) | ||
69 | + .then(contentsUI => { | ||
70 | + contentsUI | ||
71 | + .findElements(webdriver.By.tagName('li')) | ||
72 | + .then(elems => { | ||
73 | + elems.forEach(elem => { | ||
74 | + elem | ||
75 | + .getText() | ||
76 | + .then(text => { | ||
77 | + console.log(text); | ||
78 | + // 내 추측 : stale해지면, boolean 값으로 driver 어딘가에 true설정되는것 같아 | ||
79 | + // wait이라는 함수를 써서, 이게 false 가 될 때까지 기다린 다음에 getText()를 해. | ||
80 | + // ok? | ||
81 | + }) | ||
82 | + .catch(error => { | ||
83 | + // 문제가 있으면 이곳을 의심해볼것! 변수 error를 체크해보기! | ||
84 | + // console.log('really?? exception!'); | ||
85 | + }) | ||
86 | + }) | ||
87 | + | ||
88 | + elemsBtn[0].click(); | ||
89 | + | ||
90 | + sleep(50).then(() => { | ||
91 | + // Do something after the sleep! | ||
92 | + }); | ||
93 | + callback(); | ||
94 | + }) | ||
95 | + .catch(error => { | ||
96 | + console.log("asdfasdf!!', e"); | ||
97 | + }) | ||
98 | + }) | ||
99 | + .catch(error => { | ||
100 | + console.log('fuck!@#', error); | ||
101 | + }) | ||
102 | + } | ||
103 | + | ||
104 | + async.whilst( | ||
105 | + function() { | ||
106 | + console.log('whilist result', cnt < 5); | ||
107 | + return cnt < 5; | ||
108 | + }, | ||
109 | + getContentsAndClickNext, | ||
110 | + function(e) { | ||
111 | + console.log('Exception', e) | ||
112 | + } | ||
113 | + ); | ||
114 | + }) | ||
115 | + .catch(error => { | ||
116 | + console.log('Exception 4444', error); | ||
117 | + }); // 다음 컨텐츠로 가는 버튼(2개임. 하나는 평소용 두번째는 더이상 갈 페이지가 없을 때 쓰는 버튼) | ||
118 | + }) | ||
119 | + .catch(error => { | ||
120 | + console.log('Exception 555', error); | ||
121 | + }); // 방송 컨텐츠 페이지네이션 버튼 | ||
122 | + | ||
123 | + | ||
124 | + // sleep time expects milliseconds | ||
125 | + function sleep (time) { | ||
126 | + return new Promise((resolve) => setTimeout(resolve, time)); | ||
127 | + } | ||
128 | + | ||
129 | + // Usage! | ||
130 | + sleep(8000).then(() => { | ||
131 | + // Do something after the sleep! | ||
132 | + }); | ||
133 | + | ||
134 | + }); | ||
57 | }) | 135 | }) |
58 | }) | 136 | }) |
59 | }) | 137 | }) | ... | ... |
crawling_time.js
0 → 100644
1 | +var async = require('async'); | ||
2 | +var webdriver = require('selenium-webdriver'); | ||
3 | +var options = {desiredCapabilities: {browserName: 'chrome'}}; | ||
4 | +var JASON = require('jason') | ||
5 | +const {Builder, By, Key, until} = require('selenium-webdriver'); | ||
6 | +var iconv = require('iconv-lite') | ||
7 | + | ||
8 | +var findtitle = "런닝맨" | ||
9 | +var driver = new webdriver.Builder(). | ||
10 | + withCapabilities(webdriver.Capabilities.chrome()). | ||
11 | + build(); | ||
12 | + let url1 = "https://search.naver.com/search.naver?sm=tab_hty.top&where=nexearch&query=SBS+%EC%9D%BC%EC%9A%94%EC%9D%BC+%ED%8E%B8%EC%84%B1%ED%91%9C&oquery=SBS+%EC%9B%94%EC%9A%94%EC%9D%BC+%ED%8E%B8%EC%84%B1%ED%91%9C&tqi=UsDTSlpySD0ssv33OfVssssssXK-499490" | ||
13 | + driver | ||
14 | + .get(url1).then(() => { | ||
15 | + driver | ||
16 | + .findElements(webdriver.By.className('cont_inner type_day _scheduleArea')) | ||
17 | + .then(contentsearch_section=> { | ||
18 | + contentsearch_section[0].findElements(webdriver.By.tagName('tbody')) | ||
19 | + .then(tbody => { | ||
20 | + tbody[0].findElements(webdriver.By.tagName('tr')) | ||
21 | + .then(trs => { | ||
22 | + trs.forEach(tr => { | ||
23 | + tr.getText() | ||
24 | + .then(content => { | ||
25 | + if(content.indexOf(findtitle) != -1) { | ||
26 | + console.log(content) | ||
27 | + } | ||
28 | + }) | ||
29 | + }) | ||
30 | + }) | ||
31 | + }) | ||
32 | + }) | ||
33 | + }) |
-
Please register or login to post a comment