김승훈

update

......@@ -4,9 +4,12 @@ var options = {desiredCapabilities: {browserName: 'chrome'}};
var JASON = require('jason')
const {Builder, By, Key, until} = require('selenium-webdriver');
var iconv = require('iconv-lite')
const fs = require('fs')
datalist = []
module.exports = {
search_onairanddate: function(driver,string) {
search_onairanddate: function(driver,string,callback) {
let url1 = "https://search.naver.com/search.naver?sm=top_hty&fbm=1&ie=utf8&query=" + string
driver
.get(url1).then(() => {
......@@ -21,41 +24,31 @@ module.exports = {
.then(isbroad => {
isbroad.getText()
.then(isbroad_string => {
console.log(isbroad_string)
broadcasting=isbroad_string.substring(0,4)
isbroad = isbroad_string.substring(18,21)
br_date = isbroad_string.substring(23,24)
br_date = br_date + "요일"
br_time = isbroad_string.substring(26,34)
console.log(broadcasting,isbroad,br_date,br_time)
if(isbroad == "방영중") {
driver.findElement(webdriver.By.id('nx_query')).clear().then(( )=> {
//var buf = iconv.encode("편성표", "euc-kr")
//console.log(buf)
//var encodestr='';
//for(var i=0; i<buf.length;i++) {
// encodestr+='%'+buf[i].toString('16')
// }
//encodestr = encodestr.toUpperCase();
//console.log(iconv.encode(encodestr, 'EUC-KR').toString())
//console.log(encodestr)
//var buf = iconv.encode('편성표','euckr')
//var param = buf.toString('binary')
//console.log(param)
//driver.findElement(webdriver.By.id('nx_query')).sendKeys(broadcasting,br_date,param)
//driver.findElement(webdriver.By.className('spnew ico_search')).click()
})
}
else {
}
datalist.push(broadcasting,isbroad,br_date,br_time)
fs.writeFileSync("./log/"+string+"_br_isonair.txt", '\ufeff' + datalist, {encoding: 'utf8'});
callback()
})
})
})
})
})
})
driver.findElement(webdriver.By.className('brcs_thumb'))
.then(brcs_thumb => {
brcs_thumb.findElements(webdriver.By.tagName('img'))
.then( img => {
img[0].getAttribute('src')
.then(img_src => {
datalist.push(img_src)
})
})
})
}
}
\ No newline at end of file
......
......@@ -2,16 +2,34 @@ var async = require('async');
var webdriver = require('selenium-webdriver');
var options = {desiredCapabilities: {browserName: 'chrome'}};
const {Builder, By, Key, until} = require('selenium-webdriver');
const fs = require('fs')
const screen = {
width: 640,
height: 480
};
var a=0
var breaktheloop = false
var a=8000
var img_src_list = []
var text_list = []
var testStartRunTime = 0;
var testruntime = 0;
var time = 0;
module.exports = {
search_broadcasting : function(driver,url) {
search_broadcasting :function(driver,url,name,callback) {
function testRunTimer() {
var today = new Date(); // 현재시간 얻기
var runTime = today.getTime(); // 밀리세컨드 ( 1970/01/01 부터 현재까지의 시간을 밀리세컨드로 나타냄 )
var rtn = 0;
if (testStartRunTime == 0) {
testStartRunTime = runTime;
} else {
rtn = (runTime - testStartRunTime) / 1000;
testStartRunTime = 0;
}
return rtn;
}
driver
.get(url)
.get("http://"+url)
.then(() => {
driver.findElement(webdriver.By.id('pagination_76'))
.then ( paginationBtn => {
......@@ -19,56 +37,95 @@ module.exports = {
.then(Btn_next => {
driver.findElement(webdriver.By.id('listUI_76'))
.then(contentsUI => {
contentsUI.findElements(webdriver.By.tagName('li'))
.then(elems => {
elems.forEach(elem => {
elem.getText()
.then(text => {
console.log(text)
clickandget(function() {
Btn_next[0].click()
setTimeout(() => {
clickandget(function() {
Btn_next[0].click()
setTimeout(() => {
clickandget(function() {
Btn_next[0].click()
setTimeout(() => {
clickandget(function(){
Btn_next[0].click()
setTimeout(() => {
clickandget(function(){
fs.writeFileSync("./log/"+name+"_img_src_br.txt", '\ufeff' + img_src_list, {encoding: 'utf8'});
fs.writeFileSync("./log/"+name+"_title_br.txt", '\ufeff' + text_list, {encoding: 'utf8'});
setTimeout(() => {
callback()
}, 500);
})
}, testruntime*1000+800);
})
}, testruntime*1000+800);
},testruntime*1000+800)
}, testruntime*1000+800);
})
})
}, testruntime*1000+800);
})
clickandget()
function clickandget() {
function clickandget(_callback) {
testRunTimer()
Btn_next[0].isDisplayed().then(function(state) {
if(state) {
Btn_next[0].click()
sleep(500).then(()=> {
contentsUI.findElements(webdriver.By.tagName('li'))
.then(elems => {
elems.forEach(elem => {
elem.getText()
.then(text => {
console.log(text)
contentsUI.findElements(webdriver.By.tagName('li'))
.then(elems => {
elems.forEach(elem => {
elem.findElements(webdriver.By.tagName('img'))
.then(img => {
img[0].getAttribute('src')
.then(img_src => {
elem.findElements(webdriver.By.className('tit'))
.then (titles => {
titles.forEach(title => {
title.getText()
.then(text => {
text_list.push(text)
img_src_list.push(img_src)
if(text_list.length % 5 == 0) {
var testruntime = testRunTimer();
time = time+testruntime*1000
_callback()
}
})
})
})
})
})
})
})
}
else {
console.log(state)
fs.writeFileSync("./log/"+name+"_img_src_br.txt", '\ufeff' + img_src_list, {encoding: 'utf8'});
fs.writeFileSync("./log/"+name+"_title_br.txt", '\ufeff' + text_list, {encoding: 'utf8'});
setTimeout(() => {
callback()
}, 500);
}
},function(err) {
fs.writeFileSync("./log/"+name+"_img_src_br.txt", '\ufeff' + img_src_list, {encoding: 'utf8'});
fs.writeFileSync("./log/"+name+"_title_br.txt", '\ufeff' + text_list, {encoding: 'utf8'});
setTimeout(() => {
callback()
}, 500);
})
}
})
})
})
})
function getContentsandClickNext(callback) {
driver.findElement(webdriver.By.id('listUI_76'))
.then(contentsUI => {
contentsUI.findElements(webdriver.By.tagName('li'))
.then(elems => {
elems.forEach(elem => {
elem.getTex
})
})
})
}
function sleep (time) {
return new Promise((resolve) => setTimeout(resolve, time));
}
}
}
\ No newline at end of file
}
//var driver = new webdriver.Builder()
//.forBrowser('chrome')
//.withCapabilities(webdriver.Capabilities.chrome())
//.build();
//search_broadcasting(driver,"people.search.naver.com/search.naver?where=nexearch&sm=tab_ppn&query=유재석&os=94702&ie=utf8&key=PeopleService","유재석",function() {
// console.log("B")
//})
\ No newline at end of file
......
var async = require('async');
var webdriver = require('selenium-webdriver');
//var options = {desiredCapabilities: {browserName: 'chrome'}};
var chrome = require('selenium-webdriver/chrome')
var Options = new chrome.Options();
Options.addArguments('headless')
Options.addArguments('disable-gpu')
//var JASON = require('jason')
//var util = require('util')
//var EventEmitter = require('events')
const fs = require('fs')
var check = 0
var testStartRunTime = 0;
var testruntime = 0;
......@@ -18,9 +15,10 @@ var next_link_list = []
var src_list = []
var text_list = []
var return_list = []
module.exports = {
first_search : function(driver,string) {
first_search :function (driver,string,callback) {
function testRunTimer() {
var today = new Date(); // 현재시간 얻기
var runTime = today.getTime(); // 밀리세컨드 ( 1970/01/01 부터 현재까지의 시간을 밀리세컨드로 나타냄 )
......@@ -52,32 +50,21 @@ module.exports = {
imgs.forEach(img => {
img.getAttribute('src')
.then(src => {
profile.findElements(webdriver.By.className('who'))
.then(whos => {
whos.forEach(who => {
who.getText()
.then(text => {
next_link_list.push(next_link)
src_list.push(src)
text_list.push(text)
testruntime += testRunTimer()
console.log(testruntime)
setTimeout(function() {
check++;
if(check==1) {
return_list.push(next_link_list)
return_list.push(src_list)
return_list.push(text_list)
console.log(testruntime)
setTimeout(function() {
console.log(return_list)
return return_list
},500)
}
},testruntime+1000)
})
})
})
next_link_list.push(next_link)
src_list.push(src)
//text_list.push(text)
testruntime += testRunTimer()
setTimeout(function() {
check++;
if(check==1) {
//return_list.push(text_list)
setTimeout(function() {
fs.writeFileSync("./log/"+string+"_next_link.txt", '\ufeff' + next_link_list, {encoding: 'utf8'});
fs.writeFileSync("./log/"+string+"_img_src.txt", '\ufeff' + src_list, {encoding: 'utf8'});
callback()
},500)
}
},testruntime*1000+1000)
})
})
})
......@@ -88,5 +75,4 @@ module.exports = {
})
})
}
}
}
\ No newline at end of file
......
var async = require('async');
var webdriver = require('selenium-webdriver');
var options = {desiredCapabilities: {browserName: 'chrome'}};
var JASON = require('jason')
const {Builder, By, Key, until} = require('selenium-webdriver');
var iconv = require('iconv-lite')
const fs = require('fs')
var arr = []
var result_arr = []
var temp = []
var i=0
var check = 0
module.exports = {
search_broadcasting_time : function(driver,broadcast,day,findtitle) {
search_broadcasting_time : function(driver,broadcast,day,findtitle, callback) {
let url1 = "https://search.naver.com/search.naver?sm=top_hty&fbm=0&ie=utf8&query="+broadcast+"+"+day+"+편성표"
driver
.get(url1).then(() => {
......@@ -22,7 +23,29 @@ module.exports = {
tr.getText()
.then(content => {
if(content.indexOf(findtitle) != -1) {
console.log(content)
arr = content.toString().split('\n')
str(function() {
i++
if(i==2) {
fs.writeFileSync("./log/"+findtitle+"_time_detail.txt", '\ufeff' + result_arr, {encoding: 'utf8'});
callback()
}
})
function str(_callback) {
temp[i+0] = arr[0] + " " + arr[1].substr(0,2) + "분"
temp[i+1] = arr[1].substr(3)
result_arr.push(temp[i+0],temp[i+1])
if(arr.length == 3) {
temp[i+2] = arr[0] + " " + arr[2].substr(0,2) + "분"
temp[i+3] = arr[2].substr(3)
result_arr.push(temp[i+2],temp[i+3])
i++
}
setTimeout(() => {
_callback()
}, 500);
}
}
})
})
......
......@@ -6,10 +6,16 @@ var Options = new chrome.Options();
Options.addArguments('headless')
Options.addArguments('disable-gpu')
var JASON = require('jason')
const test1 = require('./crawling_samename')
var test1 = require('./crawling_samename')
const fs = require('fs')
const test2 = require('./crawling_broadcasting')
const test3 = require('./crawling_br_onair_date')
const test4 = require('./crawling_time')
var util = require('util')
var EventEmitter = require('events').EventEmitter;
var next_link_Array = []
var img_src_Array = []
const {Builder, By, Key, until} = require('selenium-webdriver');
var driver = new webdriver.Builder()
......@@ -17,31 +23,143 @@ var driver = new webdriver.Builder()
.withCapabilities(webdriver.Capabilities.chrome())
.setChromeOptions(Options)
.build();
samename_list = []
samename_list = test1.first_search(driver,"유재석")
var driver_1 = new webdriver.Builder()
.forBrowser('chrome')
.withCapabilities(webdriver.Capabilities.chrome())
.setChromeOptions(Options)
.build();
test2.search_broadcasting(driver_1,"https://people.search.naver.com/search.naver?where=nexearch&sm=tab_ppn&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&os=94702&ie=utf8&key=PeopleService")
.build()
var driver_2 = new webdriver.Builder()
.forBrowser('chrome')
.withCapabilities(webdriver.Capabilities.chrome())
.setChromeOptions(Options)
.build();
test3.search_onairanddate(driver_2,"런닝맨")
var driver_3 = new webdriver.Builder()
.forBrowser('chrome')
.withCapabilities(webdriver.Capabilities.chrome())
.setChromeOptions(Options)
.build();
var First_Search = function(name) {
var self=this;
this.on('input', function() {
if(fs.existsSync("./log/"+name+"_next_link.txt")) {
const next_link = fs.readFileSync("./log/"+name+"_next_link.txt");
const img_src = fs.readFileSync("./log/"+name+"_img_src.txt");
next_link_Array = next_link.toString().split(',');
img_src_Array = img_src.toString().split(',');
driver.quit()
console.log(next_link_Array)
console.log(img_src_Array)
setTimeout(function() {
br_search.emit('click')
},1000)
}
else {
test1.first_search(driver,name,function() {
const next_link = fs.readFileSync("./log/"+name+"_next_link.txt");
const img_src = fs.readFileSync("./log/"+name+"_img_src.txt");
next_link_Array = next_link.toString().split(',');
img_src_Array = img_src.toString().split(',');
br_search = new broadcasting_search(next_link_Array,"유재석",0)
console.log(next_link_Array)
console.log(img_src_Array)
driver.quit()
setTimeout(function() {
br_search.emit('click')
},1000)
})
}
})
}
util.inherits(First_Search,EventEmitter);
first_search1 = new First_Search("유재석")
first_search1.emit('input')
var img_src_br_list = []
var title_list = []
var broadcasting_search = function(next_link,name,num) {
var self=this;
this.on('click', function() {
if(fs.existsSync("./log/"+name+"_img_src_br.txt")) {
const img_src_br = fs.readFileSync("./log/"+name+"_img_src_br.txt");
img_src_br_list = img_src_br.toString().split(',');
const title_br = fs.readFileSync("./log/"+name+"_title_br.txt");
title_list = title_br.toString().split(',');
driver_1.quit()
console.log(img_src_br_list)
console.log(title_list)
is_onair.emit('click_1')
}
else {
test2.search_broadcasting(driver_1,next_link[num].substr(9),name,function() {
const img_src_br = fs.readFileSync("./log/"+name+"_img_src_br.txt");
img_src_br_list = img_src_br.toString().split(',');
const title_br = fs.readFileSync("./log/"+name+"_title_br.txt");
title_list = title_br.toString().split(',');
driver_1.quit()
console.log(img_src_br_list)
console.log(title_list)
is_onair.emit('click_1')
})
}
})
}
util.inherits(broadcasting_search,EventEmitter);
br_search = new broadcasting_search(next_link_Array,"유재석",0)
br_datalist = []
var isonair = function(num) {
var self = this
this.on('click_1',function() {
if(fs.existsSync("./log/"+title_list[num]+"_br_isonair.txt")) {
const img_src_br = fs.readFileSync("./log/"+title_list[num]+"_br_isonair.txt");
br_datalist = img_src_br.toString().split(',');
br_datalist[0] = br_datalist[0].trim()
console.log(br_datalist)
Search_detail_time.emit('click_2')
}
else {
test3.search_onairanddate(driver_2,title_list[num],function() {
const img_src_br = fs.readFileSync("./log/"+title_list[num]+"_br_isonair.txt");
br_datalist = img_src_br.toString().split(',');
br_datalist[0] = br_datalist[0].trim()
console.log(br_datalist)
Search_detail_time.emit('click_2')
})
}
})
}
util.inherits(isonair,EventEmitter);
is_onair = new isonair(11)
var detail_time_list = []
var search_detail_time = function(num) {
var self = this;
this.on('click_2',function() {
if(br_datalist[1] == "방영중") {
if(fs.existsSync("./log/"+title_list[num]+"_time_detail.txt")) {
const detail = fs.readFileSync("./log/"+title_list[num]+"_time_detail.txt");
detail_time_list = detail.toString().split(',');
console.log(detail_time_list)
}
else {
test4.search_broadcasting_time(driver_3,br_datalist[0],br_datalist[2],title_list[num],function(){
const detail = fs.readFileSync("./log/"+title_list[num]+"_time_detail.txt");
detail_time_list = detail.toString().split(',');
console.log(detail_time_list)
})
}
}
})
}
test4.search_broadcasting_time(driver_3,"SBS","일요일","런닝맨")
\ No newline at end of file
util.inherits(search_detail_time,EventEmitter);
Search_detail_time = new search_detail_time(11);
......
......@@ -493,6 +493,11 @@
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ="
},
"events": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/events/-/events-3.0.0.tgz",
"integrity": "sha512-Dc381HFWJzEOhQ+d8pkNon++bk9h6cdAoAj4iE6Q4y6xgTzySWXlKn05/TVNpjnfRqi/X0EpJEJohPjNI3zpVA=="
},
"extend": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
......@@ -556,6 +561,11 @@
"mime-types": "^2.1.12"
}
},
"fs": {
"version": "0.0.1-security",
"resolved": "https://registry.npmjs.org/fs/-/fs-0.0.1-security.tgz",
"integrity": "sha1-invTcYa23d84E/I4WLV+yq9eQdQ="
},
"fs-constants": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
......@@ -1291,6 +1301,14 @@
}
}
},
"util": {
"version": "0.11.1",
"resolved": "https://registry.npmjs.org/util/-/util-0.11.1.tgz",
"integrity": "sha512-HShAsny+zS2TZfaXxD9tYj4HQGlBezXZMZuM/S5PKLLoZkShZiGk9o5CzukI1LVHZvjdvZ2Sj1aW/Ndn2NB/HQ==",
"requires": {
"inherits": "2.0.3"
}
},
"util-deprecate": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
......
......@@ -10,10 +10,13 @@
"license": "ISC",
"dependencies": {
"async": "^2.6.1",
"events": "^3.0.0",
"fs": "0.0.1-security",
"iconv-lite": "^0.4.24",
"jason": "^2.0.0",
"json": "^9.0.6",
"selenium-webdriver": "^3.6.0",
"util": "^0.11.1",
"webdriverio": "^4.14.0"
}
}
......