Toggle navigation
Toggle navigation
This project
Loading...
Sign in
강상위
/
my-broadcasting
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
김승훈
2018-12-07 03:20:22 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
8ec7e6bce17af192bb6d6e865554ce88fb6c2ae0
8ec7e6bc
1 parent
9ad7029f
add crawling_time,samename,more_data
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
183 additions
and
12 deletions
crawling_br_onair_date.js
crawling_samename.js
crawling_time.js
crawling_br_onair_date.js
0 → 100644
View file @
8ec7e6b
var
async
=
require
(
'async'
);
var
webdriver
=
require
(
'selenium-webdriver'
);
var
options
=
{
desiredCapabilities
:
{
browserName
:
'chrome'
}};
var
JASON
=
require
(
'jason'
)
const
{
Builder
,
By
,
Key
,
until
}
=
require
(
'selenium-webdriver'
);
var
iconv
=
require
(
'iconv-lite'
)
var
driver
=
new
webdriver
.
Builder
().
withCapabilities
(
webdriver
.
Capabilities
.
chrome
()).
build
();
let
url1
=
"https://search.naver.com/search.naver?sm=top_hty&fbm=1&ie=utf8&query=%EB%9F%B0%EB%8B%9D%EB%A7%A8"
driver
.
get
(
url1
).
then
(()
=>
{
driver
.
findElements
(
webdriver
.
By
.
className
(
'brcs_detail'
))
.
then
(
broad_info
=>
{
broad_info
[
0
].
findElement
(
webdriver
.
By
.
tagName
(
'dl'
))
.
then
(
more_data
=>
{
more_data
.
findElement
(
webdriver
.
By
.
tagName
(
'dd'
))
.
then
(
one_line
=>
{
one_line
.
findElement
(
webdriver
.
By
.
className
(
"inline"
))
.
then
(
isbroad
=>
{
isbroad
.
getText
()
.
then
(
isbroad_string
=>
{
console
.
log
(
isbroad_string
)
broadcasting
=
isbroad_string
.
substring
(
0
,
4
)
isbroad
=
isbroad_string
.
substring
(
18
,
21
)
br_date
=
isbroad_string
.
substring
(
23
,
24
)
br_date
=
br_date
+
"요일"
br_time
=
isbroad_string
.
substring
(
26
,
34
)
console
.
log
(
broadcasting
,
isbroad
,
br_date
,
br_time
)
if
(
isbroad
==
"방영중"
)
{
driver
.
findElement
(
webdriver
.
By
.
id
(
'nx_query'
)).
clear
().
then
((
)
=>
{
//var buf = iconv.encode("편성표", "euc-kr")
//console.log(buf)
//var encodestr='';
//for(var i=0; i<buf.length;i++) {
// encodestr+='%'+buf[i].toString('16')
// }
//encodestr = encodestr.toUpperCase();
//console.log(iconv.encode(encodestr, 'EUC-KR').toString())
//console.log(encodestr)
var
buf
=
iconv
.
encode
(
'편성표'
,
'euckr'
)
var
param
=
buf
.
toString
(
'binary'
)
console
.
log
(
param
)
driver
.
findElement
(
webdriver
.
By
.
id
(
'nx_query'
)).
sendKeys
(
broadcasting
,
br_date
,
param
)
driver
.
findElement
(
webdriver
.
By
.
className
(
'spnew ico_search'
)).
click
()
})
}
else
{
}
})
})
})
})
})
})
crawling_samename.js
View file @
8ec7e6b
...
...
@@ -4,6 +4,8 @@ var options = {desiredCapabilities: {browserName: 'chrome'}};
var
JASON
=
require
(
'jason'
)
const
{
Builder
,
By
,
Key
,
until
}
=
require
(
'selenium-webdriver'
);
var
ajason
=
new
Object
()
var
aname
=
[]
var
asrc
=
[]
...
...
@@ -11,7 +13,11 @@ var alink=[]
var
driver
=
new
webdriver
.
Builder
().
withCapabilities
(
webdriver
.
Capabilities
.
chrome
()).
build
();
let
url1
=
"https://people.search.naver.com/search.naver?sm=sbx_hty&where=nexearch&ie=utf8&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&x=0&y=0"
let
url1
=
"https://people.search.naver.com/search.naver?sm=tab_hty&where=nexearch&query=%EC%9C%A0%EC%9E%AC%EC%84%9D&ie=utf8&x=0&y=0"
var
driver_1
=
new
webdriver
.
Builder
().
withCapabilities
(
webdriver
.
Capabilities
.
chrome
()).
build
();
driver
.
get
(
url1
).
then
(()
=>
{
driver
...
...
@@ -25,21 +31,13 @@ driver
img_src
[
0
].
getAttribute
(
"src"
)
.
then
(
src
=>
{
if
(
src
)
{
asrc
.
push
(
src
)
}
else
{
asrc
.
push
(
"none"
)
}
asrc
.
push
(
src
)
console
.
log
(
src
)
}).
then
(()
=>
{
console
.
log
(
asrc
,
alink
,
aname
)
console
.
log
(
asrc
)
})
})
})
whoclass
.
findElements
(
webdriver
.
By
.
tagName
(
"strong"
))
.
then
(
name_temp
=>
{
name_temp
[
0
].
getText
().
then
(
name
=>
{
...
...
@@ -54,6 +52,86 @@ driver
findname
[
0
].
getAttribute
(
"href"
)
.
then
(
link
=>
{
alink
.
push
(
link
)
}).
then
(()
=>
{
driver_1
.
get
(
alink
[
0
]).
then
(()
=>
{
driver_1
.
findElement
(
webdriver
.
By
.
id
(
'pagination_76'
))
.
then
(
paginationBtn
=>
{
paginationBtn
.
findElements
(
webdriver
.
By
.
className
(
'bt_next'
))
.
then
(
elemsBtn
=>
{
var
cnt
=
1
;
function
getContentsAndClickNext
(
callback
)
{
console
.
log
(
'higetcontests'
,
cnt
);
cnt
++
;
driver_1
.
findElement
(
webdriver
.
By
.
id
(
'listUI_76'
))
.
then
(
contentsUI
=>
{
contentsUI
.
findElements
(
webdriver
.
By
.
tagName
(
'li'
))
.
then
(
elems
=>
{
elems
.
forEach
(
elem
=>
{
elem
.
getText
()
.
then
(
text
=>
{
console
.
log
(
text
);
// 내 추측 : stale해지면, boolean 값으로 driver 어딘가에 true설정되는것 같아
// wait이라는 함수를 써서, 이게 false 가 될 때까지 기다린 다음에 getText()를 해.
// ok?
})
.
catch
(
error
=>
{
// 문제가 있으면 이곳을 의심해볼것! 변수 error를 체크해보기!
// console.log('really?? exception!');
})
})
elemsBtn
[
0
].
click
();
sleep
(
50
).
then
(()
=>
{
// Do something after the sleep!
});
callback
();
})
.
catch
(
error
=>
{
console
.
log
(
"asdfasdf!!', e"
);
})
})
.
catch
(
error
=>
{
console
.
log
(
'fuck!@#'
,
error
);
})
}
async
.
whilst
(
function
()
{
console
.
log
(
'whilist result'
,
cnt
<
5
);
return
cnt
<
5
;
},
getContentsAndClickNext
,
function
(
e
)
{
console
.
log
(
'Exception'
,
e
)
}
);
})
.
catch
(
error
=>
{
console
.
log
(
'Exception 4444'
,
error
);
});
// 다음 컨텐츠로 가는 버튼(2개임. 하나는 평소용 두번째는 더이상 갈 페이지가 없을 때 쓰는 버튼)
})
.
catch
(
error
=>
{
console
.
log
(
'Exception 555'
,
error
);
});
// 방송 컨텐츠 페이지네이션 버튼
// sleep time expects milliseconds
function
sleep
(
time
)
{
return
new
Promise
((
resolve
)
=>
setTimeout
(
resolve
,
time
));
}
// Usage!
sleep
(
8000
).
then
(()
=>
{
// Do something after the sleep!
});
});
})
})
})
...
...
crawling_time.js
0 → 100644
View file @
8ec7e6b
var
async
=
require
(
'async'
);
var
webdriver
=
require
(
'selenium-webdriver'
);
var
options
=
{
desiredCapabilities
:
{
browserName
:
'chrome'
}};
var
JASON
=
require
(
'jason'
)
const
{
Builder
,
By
,
Key
,
until
}
=
require
(
'selenium-webdriver'
);
var
iconv
=
require
(
'iconv-lite'
)
var
findtitle
=
"런닝맨"
var
driver
=
new
webdriver
.
Builder
().
withCapabilities
(
webdriver
.
Capabilities
.
chrome
()).
build
();
let
url1
=
"https://search.naver.com/search.naver?sm=tab_hty.top&where=nexearch&query=SBS+%EC%9D%BC%EC%9A%94%EC%9D%BC+%ED%8E%B8%EC%84%B1%ED%91%9C&oquery=SBS+%EC%9B%94%EC%9A%94%EC%9D%BC+%ED%8E%B8%EC%84%B1%ED%91%9C&tqi=UsDTSlpySD0ssv33OfVssssssXK-499490"
driver
.
get
(
url1
).
then
(()
=>
{
driver
.
findElements
(
webdriver
.
By
.
className
(
'cont_inner type_day _scheduleArea'
))
.
then
(
contentsearch_section
=>
{
contentsearch_section
[
0
].
findElements
(
webdriver
.
By
.
tagName
(
'tbody'
))
.
then
(
tbody
=>
{
tbody
[
0
].
findElements
(
webdriver
.
By
.
tagName
(
'tr'
))
.
then
(
trs
=>
{
trs
.
forEach
(
tr
=>
{
tr
.
getText
()
.
then
(
content
=>
{
if
(
content
.
indexOf
(
findtitle
)
!=
-
1
)
{
console
.
log
(
content
)
}
})
})
})
})
})
})
Please
register
or
login
to post a comment