Toggle navigation
Toggle navigation
This project
Loading...
Sign in
강상위
/
my-broadcasting
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
김승훈
2018-12-08 18:28:39 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
02bc4f8d31d95863ff1e73cbd98e3fbfbd945c5a
02bc4f8d
1 parent
59284c9a
update main
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
245 additions
and
248 deletions
crawling_br_onair_date.js
crawling_broadcasting.js
crawling_samename.js
crawling_time.js
main.js
crawling_br_onair_date.js
View file @
02bc4f8
...
...
@@ -5,56 +5,57 @@ var JASON = require('jason')
const
{
Builder
,
By
,
Key
,
until
}
=
require
(
'selenium-webdriver'
);
var
iconv
=
require
(
'iconv-lite'
)
var
driver
=
new
webdriver
.
Builder
().
withCapabilities
(
webdriver
.
Capabilities
.
chrome
()).
build
();
let
url1
=
"https://search.naver.com/search.naver?sm=top_hty&fbm=1&ie=utf8&query=%EB%9F%B0%EB%8B%9D%EB%A7%A8"
driver
.
get
(
url1
).
then
(()
=>
{
module
.
exports
=
{
search_onairanddate
:
function
(
driver
,
string
)
{
let
url1
=
"https://search.naver.com/search.naver?sm=top_hty&fbm=1&ie=utf8&query="
+
string
driver
.
findElements
(
webdriver
.
By
.
className
(
'brcs_detail'
))
.
then
(
broad_info
=>
{
broad_info
[
0
].
findElement
(
webdriver
.
By
.
tagName
(
'dl'
))
.
then
(
more_data
=>
{
more_data
.
findElement
(
webdriver
.
By
.
tagName
(
'dd'
))
.
then
(
one_line
=>
{
one_line
.
findElement
(
webdriver
.
By
.
className
(
"inline"
))
.
then
(
isbroad
=>
{
isbroad
.
getText
()
.
then
(
isbroad_string
=>
{
console
.
log
(
isbroad_string
)
broadcasting
=
isbroad_string
.
substring
(
0
,
4
)
isbroad
=
isbroad_string
.
substring
(
18
,
21
)
br_date
=
isbroad_string
.
substring
(
23
,
24
)
br_date
=
br_date
+
"요일"
br_time
=
isbroad_string
.
substring
(
26
,
34
)
.
get
(
url1
).
then
(()
=>
{
driver
.
findElements
(
webdriver
.
By
.
className
(
'brcs_detail'
))
.
then
(
broad_info
=>
{
broad_info
[
0
].
findElement
(
webdriver
.
By
.
tagName
(
'dl'
))
.
then
(
more_data
=>
{
more_data
.
findElement
(
webdriver
.
By
.
tagName
(
'dd'
))
.
then
(
one_line
=>
{
one_line
.
findElement
(
webdriver
.
By
.
className
(
"inline"
))
.
then
(
isbroad
=>
{
isbroad
.
getText
()
.
then
(
isbroad_string
=>
{
console
.
log
(
isbroad_string
)
broadcasting
=
isbroad_string
.
substring
(
0
,
4
)
isbroad
=
isbroad_string
.
substring
(
18
,
21
)
br_date
=
isbroad_string
.
substring
(
23
,
24
)
br_date
=
br_date
+
"요일"
br_time
=
isbroad_string
.
substring
(
26
,
34
)
console
.
log
(
broadcasting
,
isbroad
,
br_date
,
br_time
)
if
(
isbroad
==
"방영중"
)
{
driver
.
findElement
(
webdriver
.
By
.
id
(
'nx_query'
)).
clear
().
then
((
)
=>
{
//var buf = iconv.encode("편성표", "euc-kr")
//console.log(buf)
//var encodestr='';
//for(var i=0; i<buf.length;i++) {
// encodestr+='%'+buf[i].toString('16')
// }
//encodestr = encodestr.toUpperCase();
//console.log(iconv.encode(encodestr, 'EUC-KR').toString())
//console.log(encodestr)
var
buf
=
iconv
.
encode
(
'편성표'
,
'euckr'
)
var
param
=
buf
.
toString
(
'binary'
)
console
.
log
(
param
)
driver
.
findElement
(
webdriver
.
By
.
id
(
'nx_query'
)).
sendKeys
(
broadcasting
,
br_date
,
param
)
driver
.
findElement
(
webdriver
.
By
.
className
(
'spnew ico_search'
)).
click
()
})
}
else
{
console
.
log
(
broadcasting
,
isbroad
,
br_date
,
br_time
)
if
(
isbroad
==
"방영중"
)
{
driver
.
findElement
(
webdriver
.
By
.
id
(
'nx_query'
)).
clear
().
then
((
)
=>
{
//var buf = iconv.encode("편성표", "euc-kr")
//console.log(buf)
//var encodestr='';
//for(var i=0; i<buf.length;i++) {
// encodestr+='%'+buf[i].toString('16')
// }
//encodestr = encodestr.toUpperCase();
//console.log(iconv.encode(encodestr, 'EUC-KR').toString())
//console.log(encodestr)
//var buf = iconv.encode('편성표','euckr')
//var param = buf.toString('binary')
//console.log(param)
//driver.findElement(webdriver.By.id('nx_query')).sendKeys(broadcasting,br_date,param)
//driver.findElement(webdriver.By.className('spnew ico_search')).click()
})
}
}
else
{
}
})
})
})
})
})
})
})
}
}
\ No newline at end of file
...
...
crawling_broadcasting.js
View file @
02bc4f8
...
...
@@ -8,70 +8,67 @@ const screen = {
};
var
a
=
0
var
breaktheloop
=
false
var
driver
=
new
webdriver
.
Builder
().
withCapabilities
(
webdriver
.
Capabilities
.
chrome
()).
build
();
let
url
=
'https://people.search.naver.com/search.naver?where=nexearch&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&sm=tab_etc&ie=utf8&key=PeopleService&os=94702'
;
driver
.
get
(
url
)
.
then
(()
=>
{
driver
.
findElement
(
webdriver
.
By
.
id
(
'pagination_76'
))
.
then
(
paginationBtn
=>
{
paginationBtn
.
findElements
(
webdriver
.
By
.
className
(
'bt_next'
))
.
then
(
Btn_next
=>
{
driver
.
findElement
(
webdriver
.
By
.
id
(
'listUI_76'
))
.
then
(
contentsUI
=>
{
contentsUI
.
findElements
(
webdriver
.
By
.
tagName
(
'li'
))
.
then
(
elems
=>
{
elems
.
forEach
(
elem
=>
{
elem
.
getText
()
.
then
(
text
=>
{
console
.
log
(
text
)
module
.
exports
=
{
search_broadcasting
:
function
(
driver
,
url
)
{
driver
.
get
(
url
)
.
then
(()
=>
{
driver
.
findElement
(
webdriver
.
By
.
id
(
'pagination_76'
))
.
then
(
paginationBtn
=>
{
paginationBtn
.
findElements
(
webdriver
.
By
.
className
(
'bt_next'
))
.
then
(
Btn_next
=>
{
driver
.
findElement
(
webdriver
.
By
.
id
(
'listUI_76'
))
.
then
(
contentsUI
=>
{
contentsUI
.
findElements
(
webdriver
.
By
.
tagName
(
'li'
))
.
then
(
elems
=>
{
elems
.
forEach
(
elem
=>
{
elem
.
getText
()
.
then
(
text
=>
{
console
.
log
(
text
)
})
})
})
}
)
clickandget
()
function
clickandget
(
)
{
Btn_next
[
0
].
isDisplayed
().
then
(
function
(
state
)
{
if
(
state
)
{
Btn_next
[
0
].
click
()
sleep
(
500
).
then
(()
=>
{
contentsUI
.
findElements
(
webdriver
.
By
.
tagName
(
'li'
))
.
then
(
elems
=>
{
elems
.
forEach
(
elem
=>
{
elem
.
getText
()
.
then
(
text
=>
{
console
.
log
(
text
)
clickandget
(
)
function
clickandget
()
{
Btn_next
[
0
].
isDisplayed
().
then
(
function
(
state
)
{
if
(
state
)
{
Btn_next
[
0
].
click
()
sleep
(
500
).
then
(()
=>
{
contentsUI
.
findElements
(
webdriver
.
By
.
tagName
(
'li'
))
.
then
(
elems
=>
{
elems
.
forEach
(
elem
=>
{
elem
.
getText
()
.
then
(
text
=>
{
console
.
log
(
text
)
}
)
})
})
})
}
)
}
else
{
console
.
log
(
state
)
}
}
)
}
}
else
{
console
.
log
(
state
)
}
}
)
}
}
)
})
})
})
})
function
getContentsandClickNext
(
callback
)
{
driver
.
findElement
(
webdriver
.
By
.
id
(
'listUI_76'
))
.
then
(
contentsUI
=>
{
contentsUI
.
findElements
(
webdriver
.
By
.
tagName
(
'li'
))
.
then
(
elems
=>
{
elems
.
forEach
(
elem
=>
{
elem
.
getTex
function
getContentsandClickNext
(
callback
)
{
driver
.
findElement
(
webdriver
.
By
.
id
(
'listUI_76'
))
.
then
(
contentsUI
=>
{
contentsUI
.
findElements
(
webdriver
.
By
.
tagName
(
'li'
))
.
then
(
elems
=>
{
elems
.
forEach
(
elem
=>
{
elem
.
getTex
})
})
})
})
}
}
function
sleep
(
time
)
{
return
new
Promise
((
resolve
)
=>
setTimeout
(
resolve
,
time
));
}
\ No newline at end of file
function
sleep
(
time
)
{
return
new
Promise
((
resolve
)
=>
setTimeout
(
resolve
,
time
));
}
}
}
\ No newline at end of file
...
...
crawling_samename.js
View file @
02bc4f8
var
async
=
require
(
'async'
);
var
webdriver
=
require
(
'selenium-webdriver'
);
var
options
=
{
desiredCapabilities
:
{
browserName
:
'chrome'
}};
var
JASON
=
require
(
'jason'
)
const
{
Builder
,
By
,
Key
,
until
}
=
require
(
'selenium-webdriver'
);
//var options = {desiredCapabilities: {browserName: 'chrome'}};
var
chrome
=
require
(
'selenium-webdriver/chrome'
)
var
Options
=
new
chrome
.
Options
();
Options
.
addArguments
(
'headless'
)
Options
.
addArguments
(
'disable-gpu'
)
//var JASON = require('jason')
//var util = require('util')
//var EventEmitter = require('events')
var
check
=
0
var
testStartRunTime
=
0
;
var
testruntime
=
0
;
const
{
Builder
,
By
,
Key
,
until
}
=
require
(
'selenium-webdriver'
);
var
ajason
=
new
Object
()
var
aname
=
[]
var
asrc
=
[]
var
alink
=
[]
var
driver
=
new
webdriver
.
Builder
().
withCapabilities
(
webdriver
.
Capabilities
.
chrome
()).
build
();
let
url1
=
"https://people.search.naver.com/search.naver?sm=tab_hty&where=nexearch&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&ie=utf8&x=0&y=0"
var
driver_1
=
new
webdriver
.
Builder
().
withCapabilities
(
webdriver
.
Capabilities
.
chrome
()).
build
();
driver
.
get
(
url1
).
then
(()
=>
{
driver
.
findElements
(
webdriver
.
By
.
className
(
'result_profile'
))
.
then
(
whoclasses
=>
{
whoclasses
.
forEach
(
whoclass
=>
{
whoclass
.
findElements
(
webdriver
.
By
.
className
(
'thmb'
))
.
then
(
img_thmbs
=>
{
img_thmbs
.
forEach
(
img_thmb
=>
{
img_thmb
.
findElements
(
webdriver
.
By
.
className
(
'thmb_img'
)).
then
(
img_src
=>
{
img_src
[
0
].
getAttribute
(
"src"
)
.
then
(
src
=>
{
asrc
.
push
(
src
)
console
.
log
(
src
)
}).
then
(()
=>
{
console
.
log
(
asrc
)
})
})
})
whoclass
.
findElements
(
webdriver
.
By
.
tagName
(
"strong"
))
.
then
(
name_temp
=>
{
name_temp
[
0
].
getText
().
then
(
name
=>
{
aname
.
push
(
name
)
})
})
})
whoclass
.
findElements
(
webdriver
.
By
.
className
(
'who'
))
.
then
(
who
=>
{
who
[
0
].
findElements
(
webdriver
.
By
.
className
(
'name'
))
.
then
(
findname
=>
{
findname
[
0
].
getAttribute
(
"href"
)
.
then
(
link
=>
{
alink
.
push
(
link
)
}).
then
(()
=>
{
driver_1
.
get
(
alink
[
0
]).
then
(()
=>
{
driver_1
.
findElement
(
webdriver
.
By
.
id
(
'pagination_76'
))
.
then
(
paginationBtn
=>
{
paginationBtn
.
findElements
(
webdriver
.
By
.
className
(
'bt_next'
))
.
then
(
elemsBtn
=>
{
var
cnt
=
1
;
function
getContentsAndClickNext
(
callback
)
{
console
.
log
(
'higetcontests'
,
cnt
);
cnt
++
;
driver_1
.
findElement
(
webdriver
.
By
.
id
(
'listUI_76'
))
.
then
(
contentsUI
=>
{
contentsUI
.
findElements
(
webdriver
.
By
.
tagName
(
'li'
))
.
then
(
elems
=>
{
elems
.
forEach
(
elem
=>
{
elem
.
getText
()
.
then
(
text
=>
{
console
.
log
(
text
);
// 내 추측 : stale해지면, boolean 값으로 driver 어딘가에 true설정되는것 같아
// wait이라는 함수를 써서, 이게 false 가 될 때까지 기다린 다음에 getText()를 해.
// ok?
})
.
catch
(
error
=>
{
// 문제가 있으면 이곳을 의심해볼것! 변수 error를 체크해보기!
// console.log('really?? exception!');
})
})
elemsBtn
[
0
].
click
();
sleep
(
50
).
then
(()
=>
{
// Do something after the sleep!
});
callback
();
var
next_link_list
=
[]
var
src_list
=
[]
var
text_list
=
[]
var
return_list
=
[]
module
.
exports
=
{
first_search
:
function
(
driver
,
string
)
{
function
testRunTimer
()
{
var
today
=
new
Date
();
// 현재시간 얻기
var
runTime
=
today
.
getTime
();
// 밀리세컨드 ( 1970/01/01 부터 현재까지의 시간을 밀리세컨드로 나타냄 )
var
rtn
=
0
;
if
(
testStartRunTime
==
0
)
{
testStartRunTime
=
runTime
;
}
else
{
rtn
=
(
runTime
-
testStartRunTime
)
/
1000
;
testStartRunTime
=
0
;
}
return
rtn
;
}
let
url1
=
"https://people.search.naver.com/search.naver?sm=tab_hty&where=nexearch&query="
+
string
+
"&ie=utf8&x=0&y=0"
driver
.
get
(
url1
).
then
(()
=>
{
testRunTimer
()
driver
.
findElements
(
webdriver
.
By
.
className
(
'result_profile'
))
.
then
(
profiles
=>
{
profiles
.
forEach
(
profile
=>
{
profile
.
findElements
(
webdriver
.
By
.
className
(
'thmb'
))
.
then
(
links
=>
{
links
.
forEach
(
link
=>
{
link
.
getAttribute
(
'href'
)
.
then
(
next_link
=>
{
profile
.
findElements
(
webdriver
.
By
.
className
(
'thmb_img'
))
.
then
((
imgs
)
=>
{
imgs
.
forEach
(
img
=>
{
img
.
getAttribute
(
'src'
)
.
then
(
src
=>
{
profile
.
findElements
(
webdriver
.
By
.
className
(
'who'
))
.
then
(
whos
=>
{
whos
.
forEach
(
who
=>
{
who
.
getText
()
.
then
(
text
=>
{
next_link_list
.
push
(
next_link
)
src_list
.
push
(
src
)
text_list
.
push
(
text
)
testruntime
+=
testRunTimer
()
console
.
log
(
testruntime
)
setTimeout
(
function
()
{
check
++
;
if
(
check
==
1
)
{
return_list
.
push
(
next_link_list
)
return_list
.
push
(
src_list
)
return_list
.
push
(
text_list
)
console
.
log
(
testruntime
)
setTimeout
(
function
()
{
console
.
log
(
return_list
)
return
return_list
},
500
)
}
},
testruntime
+
1000
)
})
.
catch
(
error
=>
{
console
.
log
(
"asdfasdf!!', e"
);
})
})
.
catch
(
error
=>
{
console
.
log
(
'fuck!@#'
,
error
);
})
})
}
async
.
whilst
(
function
()
{
console
.
log
(
'whilist result'
,
cnt
<
5
);
return
cnt
<
5
;
},
getContentsAndClickNext
,
function
(
e
)
{
console
.
log
(
'Exception'
,
e
)
}
);
})
})
})
.
catch
(
error
=>
{
console
.
log
(
'Exception 4444'
,
error
);
});
// 다음 컨텐츠로 가는 버튼(2개임. 하나는 평소용 두번째는 더이상 갈 페이지가 없을 때 쓰는 버튼)
})
.
catch
(
error
=>
{
console
.
log
(
'Exception 555'
,
error
);
});
// 방송 컨텐츠 페이지네이션 버튼
// sleep time expects milliseconds
function
sleep
(
time
)
{
return
new
Promise
((
resolve
)
=>
setTimeout
(
resolve
,
time
));
}
// Usage!
sleep
(
8000
).
then
(()
=>
{
// Do something after the sleep!
});
});
})
})
})
})
})
}
}
})
})
\ No newline at end of file
...
...
crawling_time.js
View file @
02bc4f8
...
...
@@ -5,29 +5,30 @@ var JASON = require('jason')
const
{
Builder
,
By
,
Key
,
until
}
=
require
(
'selenium-webdriver'
);
var
iconv
=
require
(
'iconv-lite'
)
var
findtitle
=
"런닝맨"
var
driver
=
new
webdriver
.
Builder
().
withCapabilities
(
webdriver
.
Capabilities
.
chrome
()).
build
();
let
url1
=
"https://search.naver.com/search.naver?sm=tab_hty.top&where=nexearch&query=SBS+%EC%9D%BC%EC%9A%94%EC%9D%BC+%ED%8E%B8%EC%84%B1%ED%91%9C&oquery=SBS+%EC%9B%94%EC%9A%94%EC%9D%BC+%ED%8E%B8%EC%84%B1%ED%91%9C&tqi=UsDTSlpySD0ssv33OfVssssssXK-499490"
driver
.
get
(
url1
).
then
(()
=>
{
module
.
exports
=
{
search_broadcasting_time
:
function
(
driver
,
broadcast
,
day
,
findtitle
)
{
let
url1
=
"https://search.naver.com/search.naver?sm=top_hty&fbm=0&ie=utf8&query="
+
broadcast
+
"+"
+
day
+
"+편성표"
driver
.
findElements
(
webdriver
.
By
.
className
(
'cont_inner type_day _scheduleArea'
))
.
then
(
contentsearch_section
=>
{
contentsearch_section
[
0
].
findElements
(
webdriver
.
By
.
tagName
(
'tbody'
))
.
then
(
tbody
=>
{
tbody
[
0
].
findElements
(
webdriver
.
By
.
tagName
(
'tr'
))
.
then
(
trs
=>
{
trs
.
forEach
(
tr
=>
{
tr
.
getText
()
.
then
(
content
=>
{
if
(
content
.
indexOf
(
findtitle
)
!=
-
1
)
{
console
.
log
(
content
)
}
.
get
(
url1
).
then
(()
=>
{
driver
.
findElements
(
webdriver
.
By
.
className
(
'cont_inner type_day _scheduleArea'
))
.
then
(
contentsearch_section
=>
{
contentsearch_section
[
0
].
findElements
(
webdriver
.
By
.
tagName
(
'tbody'
))
.
then
(
tbody
=>
{
tbody
[
0
].
findElements
(
webdriver
.
By
.
tagName
(
'tr'
))
.
then
(
trs
=>
{
trs
.
forEach
(
tr
=>
{
tr
.
getText
()
.
then
(
content
=>
{
if
(
content
.
indexOf
(
findtitle
)
!=
-
1
)
{
console
.
log
(
content
)
}
})
})
})
})
})
})
})
}
}
\ No newline at end of file
...
...
main.js
0 → 100644
View file @
02bc4f8
var
async
=
require
(
'async'
);
var
webdriver
=
require
(
'selenium-webdriver'
);
//var options = {desiredCapabilities: {browserName: 'chrome'}};
var
chrome
=
require
(
'selenium-webdriver/chrome'
)
var
Options
=
new
chrome
.
Options
();
Options
.
addArguments
(
'headless'
)
Options
.
addArguments
(
'disable-gpu'
)
var
JASON
=
require
(
'jason'
)
const
test1
=
require
(
'./crawling_samename'
)
const
test2
=
require
(
'./crawling_broadcasting'
)
const
test3
=
require
(
'./crawling_br_onair_date'
)
const
test4
=
require
(
'./crawling_time'
)
const
{
Builder
,
By
,
Key
,
until
}
=
require
(
'selenium-webdriver'
);
var
driver
=
new
webdriver
.
Builder
()
.
forBrowser
(
'chrome'
)
.
withCapabilities
(
webdriver
.
Capabilities
.
chrome
())
.
setChromeOptions
(
Options
)
.
build
();
samename_list
=
[]
samename_list
=
test1
.
first_search
(
driver
,
"유재석"
)
var
driver_1
=
new
webdriver
.
Builder
()
.
forBrowser
(
'chrome'
)
.
withCapabilities
(
webdriver
.
Capabilities
.
chrome
())
.
setChromeOptions
(
Options
)
.
build
();
test2
.
search_broadcasting
(
driver_1
,
"https://people.search.naver.com/search.naver?where=nexearch&sm=tab_ppn&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&os=94702&ie=utf8&key=PeopleService"
)
var
driver_2
=
new
webdriver
.
Builder
()
.
forBrowser
(
'chrome'
)
.
withCapabilities
(
webdriver
.
Capabilities
.
chrome
())
.
setChromeOptions
(
Options
)
.
build
();
test3
.
search_onairanddate
(
driver_2
,
"런닝맨"
)
var
driver_3
=
new
webdriver
.
Builder
()
.
forBrowser
(
'chrome'
)
.
withCapabilities
(
webdriver
.
Capabilities
.
chrome
())
.
setChromeOptions
(
Options
)
.
build
();
test4
.
search_broadcasting_time
(
driver_3
,
"SBS"
,
"일요일"
,
"런닝맨"
)
\ No newline at end of file
Please
register
or
login
to post a comment