Showing
5 changed files
with
136 additions
and
139 deletions
... | @@ -5,10 +5,9 @@ var JASON = require('jason') | ... | @@ -5,10 +5,9 @@ var JASON = require('jason') |
5 | const {Builder, By, Key, until} = require('selenium-webdriver'); | 5 | const {Builder, By, Key, until} = require('selenium-webdriver'); |
6 | var iconv = require('iconv-lite') | 6 | var iconv = require('iconv-lite') |
7 | 7 | ||
8 | -var driver = new webdriver.Builder(). | 8 | +module.exports = { |
9 | - withCapabilities(webdriver.Capabilities.chrome()). | 9 | + search_onairanddate: function(driver,string) { |
10 | - build(); | 10 | + let url1 = "https://search.naver.com/search.naver?sm=top_hty&fbm=1&ie=utf8&query=" + string |
11 | - let url1 = "https://search.naver.com/search.naver?sm=top_hty&fbm=1&ie=utf8&query=%EB%9F%B0%EB%8B%9D%EB%A7%A8" | ||
12 | driver | 11 | driver |
13 | .get(url1).then(() => { | 12 | .get(url1).then(() => { |
14 | driver | 13 | driver |
... | @@ -41,11 +40,11 @@ var driver = new webdriver.Builder(). | ... | @@ -41,11 +40,11 @@ var driver = new webdriver.Builder(). |
41 | //encodestr = encodestr.toUpperCase(); | 40 | //encodestr = encodestr.toUpperCase(); |
42 | //console.log(iconv.encode(encodestr, 'EUC-KR').toString()) | 41 | //console.log(iconv.encode(encodestr, 'EUC-KR').toString()) |
43 | //console.log(encodestr) | 42 | //console.log(encodestr) |
44 | - var buf = iconv.encode('편성표','euckr') | 43 | + //var buf = iconv.encode('편성표','euckr') |
45 | - var param = buf.toString('binary') | 44 | + //var param = buf.toString('binary') |
46 | - console.log(param) | 45 | + //console.log(param) |
47 | - driver.findElement(webdriver.By.id('nx_query')).sendKeys(broadcasting,br_date,param) | 46 | + //driver.findElement(webdriver.By.id('nx_query')).sendKeys(broadcasting,br_date,param) |
48 | - driver.findElement(webdriver.By.className('spnew ico_search')).click() | 47 | + //driver.findElement(webdriver.By.className('spnew ico_search')).click() |
49 | }) | 48 | }) |
50 | 49 | ||
51 | } | 50 | } |
... | @@ -58,3 +57,5 @@ var driver = new webdriver.Builder(). | ... | @@ -58,3 +57,5 @@ var driver = new webdriver.Builder(). |
58 | }) | 57 | }) |
59 | }) | 58 | }) |
60 | }) | 59 | }) |
60 | + } | ||
61 | +} | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -8,13 +8,8 @@ const screen = { | ... | @@ -8,13 +8,8 @@ const screen = { |
8 | }; | 8 | }; |
9 | var a=0 | 9 | var a=0 |
10 | var breaktheloop = false | 10 | var breaktheloop = false |
11 | -var driver = new webdriver.Builder(). | 11 | +module.exports = { |
12 | - withCapabilities(webdriver.Capabilities.chrome()). | 12 | + search_broadcasting : function(driver,url) { |
13 | - build(); | ||
14 | - | ||
15 | - | ||
16 | - | ||
17 | - let url = 'https://people.search.naver.com/search.naver?where=nexearch&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&sm=tab_etc&ie=utf8&key=PeopleService&os=94702'; | ||
18 | driver | 13 | driver |
19 | .get(url) | 14 | .get(url) |
20 | .then(() => { | 15 | .then(() => { |
... | @@ -75,3 +70,5 @@ var driver = new webdriver.Builder(). | ... | @@ -75,3 +70,5 @@ var driver = new webdriver.Builder(). |
75 | function sleep (time) { | 70 | function sleep (time) { |
76 | return new Promise((resolve) => setTimeout(resolve, time)); | 71 | return new Promise((resolve) => setTimeout(resolve, time)); |
77 | } | 72 | } |
73 | + } | ||
74 | +} | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
1 | var async = require('async'); | 1 | var async = require('async'); |
2 | var webdriver = require('selenium-webdriver'); | 2 | var webdriver = require('selenium-webdriver'); |
3 | -var options = {desiredCapabilities: {browserName: 'chrome'}}; | 3 | +//var options = {desiredCapabilities: {browserName: 'chrome'}}; |
4 | -var JASON = require('jason') | 4 | +var chrome = require('selenium-webdriver/chrome') |
5 | -const {Builder, By, Key, until} = require('selenium-webdriver'); | 5 | +var Options = new chrome.Options(); |
6 | +Options.addArguments('headless') | ||
7 | +Options.addArguments('disable-gpu') | ||
8 | +//var JASON = require('jason') | ||
9 | +//var util = require('util') | ||
10 | +//var EventEmitter = require('events') | ||
11 | +var check = 0 | ||
12 | +var testStartRunTime = 0; | ||
13 | +var testruntime = 0; | ||
6 | 14 | ||
15 | +const {Builder, By, Key, until} = require('selenium-webdriver'); | ||
7 | 16 | ||
17 | +var next_link_list = [] | ||
18 | +var src_list = [] | ||
19 | +var text_list = [] | ||
20 | +var return_list = [] | ||
21 | +module.exports = { | ||
22 | + first_search : function(driver,string) { | ||
8 | 23 | ||
9 | -var ajason = new Object() | 24 | + function testRunTimer() { |
10 | -var aname = [] | 25 | + var today = new Date(); // 현재시간 얻기 |
11 | -var asrc=[] | 26 | + var runTime = today.getTime(); // 밀리세컨드 ( 1970/01/01 부터 현재까지의 시간을 밀리세컨드로 나타냄 ) |
12 | -var alink=[] | 27 | + var rtn = 0; |
13 | -var driver = new webdriver.Builder(). | ||
14 | -withCapabilities(webdriver.Capabilities.chrome()). | ||
15 | -build(); | ||
16 | -let url1 = "https://people.search.naver.com/search.naver?sm=tab_hty&where=nexearch&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&ie=utf8&x=0&y=0" | ||
17 | -var driver_1 = new webdriver.Builder(). | ||
18 | -withCapabilities(webdriver.Capabilities.chrome()). | ||
19 | -build(); | ||
20 | 28 | ||
21 | -driver | 29 | + if (testStartRunTime == 0) { |
22 | -.get(url1).then(() => { | 30 | + testStartRunTime = runTime; |
31 | + } else { | ||
32 | + rtn = (runTime - testStartRunTime) / 1000; | ||
33 | + testStartRunTime = 0; | ||
34 | + } | ||
35 | + return rtn; | ||
36 | + } | ||
37 | + let url1 = "https://people.search.naver.com/search.naver?sm=tab_hty&where=nexearch&query="+string+"&ie=utf8&x=0&y=0" | ||
38 | + driver | ||
39 | + .get(url1).then(() => { | ||
40 | + testRunTimer() | ||
23 | driver | 41 | driver |
24 | .findElements(webdriver.By.className('result_profile')) | 42 | .findElements(webdriver.By.className('result_profile')) |
25 | - .then(whoclasses=> { | 43 | + .then(profiles => { |
26 | - whoclasses.forEach(whoclass => { | 44 | + profiles.forEach(profile => { |
27 | - whoclass.findElements(webdriver.By.className('thmb')) | 45 | + profile.findElements(webdriver.By.className('thmb')) |
28 | - .then(img_thmbs => { | 46 | + .then(links=> { |
29 | - img_thmbs.forEach(img_thmb => { | 47 | + links.forEach(link => { |
30 | - img_thmb.findElements(webdriver.By.className('thmb_img')).then(img_src => { | 48 | + link.getAttribute('href') |
31 | - | 49 | + .then(next_link => { |
32 | - img_src[0].getAttribute("src") | 50 | + profile.findElements(webdriver.By.className('thmb_img')) |
51 | + .then((imgs) => { | ||
52 | + imgs.forEach(img => { | ||
53 | + img.getAttribute('src') | ||
33 | .then(src => { | 54 | .then(src => { |
34 | - asrc.push(src) | 55 | + profile.findElements(webdriver.By.className('who')) |
35 | - console.log(src) | 56 | + .then(whos => { |
36 | - }).then(() => { | 57 | + whos.forEach(who => { |
37 | - console.log(asrc) | 58 | + who.getText() |
38 | - }) | ||
39 | - }) | ||
40 | - }) | ||
41 | - whoclass.findElements(webdriver.By.tagName("strong")) | ||
42 | - .then(name_temp => { | ||
43 | - name_temp[0].getText().then(name => { | ||
44 | - aname.push(name) | ||
45 | - }) | ||
46 | - }) | ||
47 | - }) | ||
48 | - whoclass.findElements(webdriver.By.className('who')) | ||
49 | - .then(who => { | ||
50 | - who[0].findElements(webdriver.By.className('name')) | ||
51 | - .then(findname => { | ||
52 | - findname[0].getAttribute("href") | ||
53 | - .then(link => { | ||
54 | - alink.push(link) | ||
55 | - }).then(() => { | ||
56 | - driver_1.get(alink[0]).then(() => { | ||
57 | - driver_1 | ||
58 | - .findElement(webdriver.By.id('pagination_76')) | ||
59 | - .then(paginationBtn => { | ||
60 | - paginationBtn.findElements(webdriver.By.className('bt_next')) | ||
61 | - .then(elemsBtn => { | ||
62 | - var cnt = 1; | ||
63 | - | ||
64 | - function getContentsAndClickNext (callback) { | ||
65 | - console.log('higetcontests', cnt); | ||
66 | - cnt++; | ||
67 | - driver_1 | ||
68 | - .findElement(webdriver.By.id('listUI_76')) | ||
69 | - .then(contentsUI => { | ||
70 | - contentsUI | ||
71 | - .findElements(webdriver.By.tagName('li')) | ||
72 | - .then(elems => { | ||
73 | - elems.forEach(elem => { | ||
74 | - elem | ||
75 | - .getText() | ||
76 | .then(text => { | 59 | .then(text => { |
77 | - console.log(text); | 60 | + next_link_list.push(next_link) |
78 | - // 내 추측 : stale해지면, boolean 값으로 driver 어딘가에 true설정되는것 같아 | 61 | + src_list.push(src) |
79 | - // wait이라는 함수를 써서, 이게 false 가 될 때까지 기다린 다음에 getText()를 해. | 62 | + text_list.push(text) |
80 | - // ok? | 63 | + testruntime += testRunTimer() |
81 | - }) | 64 | + console.log(testruntime) |
82 | - .catch(error => { | 65 | + setTimeout(function() { |
83 | - // 문제가 있으면 이곳을 의심해볼것! 변수 error를 체크해보기! | 66 | + check++; |
84 | - // console.log('really?? exception!'); | 67 | + if(check==1) { |
68 | + return_list.push(next_link_list) | ||
69 | + return_list.push(src_list) | ||
70 | + return_list.push(text_list) | ||
71 | + console.log(testruntime) | ||
72 | + setTimeout(function() { | ||
73 | + console.log(return_list) | ||
74 | + return return_list | ||
75 | + },500) | ||
76 | + } | ||
77 | + },testruntime+1000) | ||
85 | }) | 78 | }) |
86 | }) | 79 | }) |
87 | - | ||
88 | - elemsBtn[0].click(); | ||
89 | - | ||
90 | - sleep(50).then(() => { | ||
91 | - // Do something after the sleep! | ||
92 | - }); | ||
93 | - callback(); | ||
94 | }) | 80 | }) |
95 | - .catch(error => { | ||
96 | - console.log("asdfasdf!!', e"); | ||
97 | }) | 81 | }) |
98 | }) | 82 | }) |
99 | - .catch(error => { | ||
100 | - console.log('fuck!@#', error); | ||
101 | }) | 83 | }) |
102 | - } | ||
103 | - | ||
104 | - async.whilst( | ||
105 | - function() { | ||
106 | - console.log('whilist result', cnt < 5); | ||
107 | - return cnt < 5; | ||
108 | - }, | ||
109 | - getContentsAndClickNext, | ||
110 | - function(e) { | ||
111 | - console.log('Exception', e) | ||
112 | - } | ||
113 | - ); | ||
114 | }) | 84 | }) |
115 | - .catch(error => { | ||
116 | - console.log('Exception 4444', error); | ||
117 | - }); // 다음 컨텐츠로 가는 버튼(2개임. 하나는 평소용 두번째는 더이상 갈 페이지가 없을 때 쓰는 버튼) | ||
118 | }) | 85 | }) |
119 | - .catch(error => { | ||
120 | - console.log('Exception 555', error); | ||
121 | - }); // 방송 컨텐츠 페이지네이션 버튼 | ||
122 | - | ||
123 | - | ||
124 | - // sleep time expects milliseconds | ||
125 | - function sleep (time) { | ||
126 | - return new Promise((resolve) => setTimeout(resolve, time)); | ||
127 | - } | ||
128 | - | ||
129 | - // Usage! | ||
130 | - sleep(8000).then(() => { | ||
131 | - // Do something after the sleep! | ||
132 | - }); | ||
133 | - | ||
134 | - }); | ||
135 | }) | 86 | }) |
136 | }) | 87 | }) |
137 | }) | 88 | }) |
138 | }) | 89 | }) |
90 | + } | ||
91 | +} | ||
139 | 92 | ||
140 | - }) | ||
141 | -}) | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
... | @@ -5,11 +5,10 @@ var JASON = require('jason') | ... | @@ -5,11 +5,10 @@ var JASON = require('jason') |
5 | const {Builder, By, Key, until} = require('selenium-webdriver'); | 5 | const {Builder, By, Key, until} = require('selenium-webdriver'); |
6 | var iconv = require('iconv-lite') | 6 | var iconv = require('iconv-lite') |
7 | 7 | ||
8 | -var findtitle = "런닝맨" | 8 | + |
9 | -var driver = new webdriver.Builder(). | 9 | +module.exports = { |
10 | - withCapabilities(webdriver.Capabilities.chrome()). | 10 | + search_broadcasting_time : function(driver,broadcast,day,findtitle) { |
11 | - build(); | 11 | + let url1 = "https://search.naver.com/search.naver?sm=top_hty&fbm=0&ie=utf8&query="+broadcast+"+"+day+"+편성표" |
12 | - let url1 = "https://search.naver.com/search.naver?sm=tab_hty.top&where=nexearch&query=SBS+%EC%9D%BC%EC%9A%94%EC%9D%BC+%ED%8E%B8%EC%84%B1%ED%91%9C&oquery=SBS+%EC%9B%94%EC%9A%94%EC%9D%BC+%ED%8E%B8%EC%84%B1%ED%91%9C&tqi=UsDTSlpySD0ssv33OfVssssssXK-499490" | ||
13 | driver | 12 | driver |
14 | .get(url1).then(() => { | 13 | .get(url1).then(() => { |
15 | driver | 14 | driver |
... | @@ -31,3 +30,5 @@ var driver = new webdriver.Builder(). | ... | @@ -31,3 +30,5 @@ var driver = new webdriver.Builder(). |
31 | }) | 30 | }) |
32 | }) | 31 | }) |
33 | }) | 32 | }) |
33 | + } | ||
34 | +} | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
main.js
0 → 100644
1 | +var async = require('async'); | ||
2 | +var webdriver = require('selenium-webdriver'); | ||
3 | +//var options = {desiredCapabilities: {browserName: 'chrome'}}; | ||
4 | +var chrome = require('selenium-webdriver/chrome') | ||
5 | +var Options = new chrome.Options(); | ||
6 | +Options.addArguments('headless') | ||
7 | +Options.addArguments('disable-gpu') | ||
8 | +var JASON = require('jason') | ||
9 | +const test1 = require('./crawling_samename') | ||
10 | +const test2 = require('./crawling_broadcasting') | ||
11 | +const test3 = require('./crawling_br_onair_date') | ||
12 | +const test4 = require('./crawling_time') | ||
13 | + | ||
14 | +const {Builder, By, Key, until} = require('selenium-webdriver'); | ||
15 | +var driver = new webdriver.Builder() | ||
16 | +.forBrowser('chrome') | ||
17 | +.withCapabilities(webdriver.Capabilities.chrome()) | ||
18 | +.setChromeOptions(Options) | ||
19 | +.build(); | ||
20 | +samename_list = [] | ||
21 | +samename_list = test1.first_search(driver,"유재석") | ||
22 | + | ||
23 | +var driver_1 = new webdriver.Builder() | ||
24 | +.forBrowser('chrome') | ||
25 | +.withCapabilities(webdriver.Capabilities.chrome()) | ||
26 | +.setChromeOptions(Options) | ||
27 | +.build(); | ||
28 | + | ||
29 | +test2.search_broadcasting(driver_1,"https://people.search.naver.com/search.naver?where=nexearch&sm=tab_ppn&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&os=94702&ie=utf8&key=PeopleService") | ||
30 | + | ||
31 | + | ||
32 | +var driver_2 = new webdriver.Builder() | ||
33 | +.forBrowser('chrome') | ||
34 | +.withCapabilities(webdriver.Capabilities.chrome()) | ||
35 | +.setChromeOptions(Options) | ||
36 | +.build(); | ||
37 | + | ||
38 | +test3.search_onairanddate(driver_2,"런닝맨") | ||
39 | + | ||
40 | + | ||
41 | +var driver_3 = new webdriver.Builder() | ||
42 | +.forBrowser('chrome') | ||
43 | +.withCapabilities(webdriver.Capabilities.chrome()) | ||
44 | +.setChromeOptions(Options) | ||
45 | +.build(); | ||
46 | + | ||
47 | +test4.search_broadcasting_time(driver_3,"SBS","일요일","런닝맨") | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment