김승훈

update main

......@@ -5,10 +5,9 @@ var JASON = require('jason')
const {Builder, By, Key, until} = require('selenium-webdriver');
var iconv = require('iconv-lite')
var driver = new webdriver.Builder().
withCapabilities(webdriver.Capabilities.chrome()).
build();
let url1 = "https://search.naver.com/search.naver?sm=top_hty&fbm=1&ie=utf8&query=%EB%9F%B0%EB%8B%9D%EB%A7%A8"
module.exports = {
search_onairanddate: function(driver,string) {
let url1 = "https://search.naver.com/search.naver?sm=top_hty&fbm=1&ie=utf8&query=" + string
driver
.get(url1).then(() => {
driver
......@@ -41,11 +40,11 @@ var driver = new webdriver.Builder().
//encodestr = encodestr.toUpperCase();
//console.log(iconv.encode(encodestr, 'EUC-KR').toString())
//console.log(encodestr)
var buf = iconv.encode('편성표','euckr')
var param = buf.toString('binary')
console.log(param)
driver.findElement(webdriver.By.id('nx_query')).sendKeys(broadcasting,br_date,param)
driver.findElement(webdriver.By.className('spnew ico_search')).click()
//var buf = iconv.encode('편성표','euckr')
//var param = buf.toString('binary')
//console.log(param)
//driver.findElement(webdriver.By.id('nx_query')).sendKeys(broadcasting,br_date,param)
//driver.findElement(webdriver.By.className('spnew ico_search')).click()
})
}
......@@ -58,3 +57,5 @@ var driver = new webdriver.Builder().
})
})
})
}
}
\ No newline at end of file
......
......@@ -8,13 +8,8 @@ const screen = {
};
var a=0
var breaktheloop = false
var driver = new webdriver.Builder().
withCapabilities(webdriver.Capabilities.chrome()).
build();
let url = 'https://people.search.naver.com/search.naver?where=nexearch&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&sm=tab_etc&ie=utf8&key=PeopleService&os=94702';
module.exports = {
search_broadcasting : function(driver,url) {
driver
.get(url)
.then(() => {
......@@ -75,3 +70,5 @@ var driver = new webdriver.Builder().
function sleep (time) {
return new Promise((resolve) => setTimeout(resolve, time));
}
}
}
\ No newline at end of file
......
var async = require('async');
var webdriver = require('selenium-webdriver');
var options = {desiredCapabilities: {browserName: 'chrome'}};
var JASON = require('jason')
const {Builder, By, Key, until} = require('selenium-webdriver');
//var options = {desiredCapabilities: {browserName: 'chrome'}};
var chrome = require('selenium-webdriver/chrome')
var Options = new chrome.Options();
Options.addArguments('headless')
Options.addArguments('disable-gpu')
//var JASON = require('jason')
//var util = require('util')
//var EventEmitter = require('events')
var check = 0
var testStartRunTime = 0;
var testruntime = 0;
const {Builder, By, Key, until} = require('selenium-webdriver');
var next_link_list = []
var src_list = []
var text_list = []
var return_list = []
module.exports = {
first_search : function(driver,string) {
var ajason = new Object()
var aname = []
var asrc=[]
var alink=[]
var driver = new webdriver.Builder().
withCapabilities(webdriver.Capabilities.chrome()).
build();
let url1 = "https://people.search.naver.com/search.naver?sm=tab_hty&where=nexearch&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&ie=utf8&x=0&y=0"
var driver_1 = new webdriver.Builder().
withCapabilities(webdriver.Capabilities.chrome()).
build();
function testRunTimer() {
var today = new Date(); // 현재시간 얻기
var runTime = today.getTime(); // 밀리세컨드 ( 1970/01/01 부터 현재까지의 시간을 밀리세컨드로 나타냄 )
var rtn = 0;
driver
.get(url1).then(() => {
if (testStartRunTime == 0) {
testStartRunTime = runTime;
} else {
rtn = (runTime - testStartRunTime) / 1000;
testStartRunTime = 0;
}
return rtn;
}
let url1 = "https://people.search.naver.com/search.naver?sm=tab_hty&where=nexearch&query="+string+"&ie=utf8&x=0&y=0"
driver
.get(url1).then(() => {
testRunTimer()
driver
.findElements(webdriver.By.className('result_profile'))
.then(whoclasses=> {
whoclasses.forEach(whoclass => {
whoclass.findElements(webdriver.By.className('thmb'))
.then(img_thmbs => {
img_thmbs.forEach(img_thmb => {
img_thmb.findElements(webdriver.By.className('thmb_img')).then(img_src => {
img_src[0].getAttribute("src")
.then(profiles => {
profiles.forEach(profile => {
profile.findElements(webdriver.By.className('thmb'))
.then(links=> {
links.forEach(link => {
link.getAttribute('href')
.then(next_link => {
profile.findElements(webdriver.By.className('thmb_img'))
.then((imgs) => {
imgs.forEach(img => {
img.getAttribute('src')
.then(src => {
asrc.push(src)
console.log(src)
}).then(() => {
console.log(asrc)
})
})
})
whoclass.findElements(webdriver.By.tagName("strong"))
.then(name_temp => {
name_temp[0].getText().then(name => {
aname.push(name)
})
})
})
whoclass.findElements(webdriver.By.className('who'))
.then(who => {
who[0].findElements(webdriver.By.className('name'))
.then(findname => {
findname[0].getAttribute("href")
.then(link => {
alink.push(link)
}).then(() => {
driver_1.get(alink[0]).then(() => {
driver_1
.findElement(webdriver.By.id('pagination_76'))
.then(paginationBtn => {
paginationBtn.findElements(webdriver.By.className('bt_next'))
.then(elemsBtn => {
var cnt = 1;
function getContentsAndClickNext (callback) {
console.log('higetcontests', cnt);
cnt++;
driver_1
.findElement(webdriver.By.id('listUI_76'))
.then(contentsUI => {
contentsUI
.findElements(webdriver.By.tagName('li'))
.then(elems => {
elems.forEach(elem => {
elem
.getText()
profile.findElements(webdriver.By.className('who'))
.then(whos => {
whos.forEach(who => {
who.getText()
.then(text => {
console.log(text);
// 내 추측 : stale해지면, boolean 값으로 driver 어딘가에 true설정되는것 같아
// wait이라는 함수를 써서, 이게 false 가 될 때까지 기다린 다음에 getText()를 해.
// ok?
})
.catch(error => {
// 문제가 있으면 이곳을 의심해볼것! 변수 error를 체크해보기!
// console.log('really?? exception!');
next_link_list.push(next_link)
src_list.push(src)
text_list.push(text)
testruntime += testRunTimer()
console.log(testruntime)
setTimeout(function() {
check++;
if(check==1) {
return_list.push(next_link_list)
return_list.push(src_list)
return_list.push(text_list)
console.log(testruntime)
setTimeout(function() {
console.log(return_list)
return return_list
},500)
}
},testruntime+1000)
})
})
elemsBtn[0].click();
sleep(50).then(() => {
// Do something after the sleep!
});
callback();
})
.catch(error => {
console.log("asdfasdf!!', e");
})
})
.catch(error => {
console.log('fuck!@#', error);
})
}
async.whilst(
function() {
console.log('whilist result', cnt < 5);
return cnt < 5;
},
getContentsAndClickNext,
function(e) {
console.log('Exception', e)
}
);
})
.catch(error => {
console.log('Exception 4444', error);
}); // 다음 컨텐츠로 가는 버튼(2개임. 하나는 평소용 두번째는 더이상 갈 페이지가 없을 때 쓰는 버튼)
})
.catch(error => {
console.log('Exception 555', error);
}); // 방송 컨텐츠 페이지네이션 버튼
// sleep time expects milliseconds
function sleep (time) {
return new Promise((resolve) => setTimeout(resolve, time));
}
// Usage!
sleep(8000).then(() => {
// Do something after the sleep!
});
});
})
})
})
})
}
}
})
})
\ No newline at end of file
......
......@@ -5,11 +5,10 @@ var JASON = require('jason')
const {Builder, By, Key, until} = require('selenium-webdriver');
var iconv = require('iconv-lite')
var findtitle = "런닝맨"
var driver = new webdriver.Builder().
withCapabilities(webdriver.Capabilities.chrome()).
build();
let url1 = "https://search.naver.com/search.naver?sm=tab_hty.top&where=nexearch&query=SBS+%EC%9D%BC%EC%9A%94%EC%9D%BC+%ED%8E%B8%EC%84%B1%ED%91%9C&oquery=SBS+%EC%9B%94%EC%9A%94%EC%9D%BC+%ED%8E%B8%EC%84%B1%ED%91%9C&tqi=UsDTSlpySD0ssv33OfVssssssXK-499490"
module.exports = {
search_broadcasting_time : function(driver,broadcast,day,findtitle) {
let url1 = "https://search.naver.com/search.naver?sm=top_hty&fbm=0&ie=utf8&query="+broadcast+"+"+day+"+편성표"
driver
.get(url1).then(() => {
driver
......@@ -31,3 +30,5 @@ var driver = new webdriver.Builder().
})
})
})
}
}
\ No newline at end of file
......
var async = require('async');
var webdriver = require('selenium-webdriver');
//var options = {desiredCapabilities: {browserName: 'chrome'}};
var chrome = require('selenium-webdriver/chrome')
var Options = new chrome.Options();
Options.addArguments('headless')
Options.addArguments('disable-gpu')
var JASON = require('jason')
const test1 = require('./crawling_samename')
const test2 = require('./crawling_broadcasting')
const test3 = require('./crawling_br_onair_date')
const test4 = require('./crawling_time')
const {Builder, By, Key, until} = require('selenium-webdriver');
var driver = new webdriver.Builder()
.forBrowser('chrome')
.withCapabilities(webdriver.Capabilities.chrome())
.setChromeOptions(Options)
.build();
samename_list = []
samename_list = test1.first_search(driver,"유재석")
var driver_1 = new webdriver.Builder()
.forBrowser('chrome')
.withCapabilities(webdriver.Capabilities.chrome())
.setChromeOptions(Options)
.build();
test2.search_broadcasting(driver_1,"https://people.search.naver.com/search.naver?where=nexearch&sm=tab_ppn&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&os=94702&ie=utf8&key=PeopleService")
var driver_2 = new webdriver.Builder()
.forBrowser('chrome')
.withCapabilities(webdriver.Capabilities.chrome())
.setChromeOptions(Options)
.build();
test3.search_onairanddate(driver_2,"런닝맨")
var driver_3 = new webdriver.Builder()
.forBrowser('chrome')
.withCapabilities(webdriver.Capabilities.chrome())
.setChromeOptions(Options)
.build();
test4.search_broadcasting_time(driver_3,"SBS","일요일","런닝맨")
\ No newline at end of file