Toggle navigation
Toggle navigation
This project
Loading...
Sign in
김대선
/
Recruitment_Information_chatbot
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
김대선
2021-05-10 01:12:09 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
ba4d069f9d6bc420736e3ff2d4db645a6aa3bbe0
ba4d069f
1 parent
ae25b539
kakao 채용정보를 가져와서 분류하기 구현
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
86 additions
and
22 deletions
app.js
router/kakao.js
router/kakaoFunction.js
router/test.js
app.js
View file @
ba4d069
...
...
@@ -2,7 +2,7 @@ const express = require('express')
const
puppeteer
=
require
(
'puppeteer'
)
const
naverRouter
=
require
(
'./router/naver'
)
const
kakaoRouter
=
require
(
'./router/kakao'
)
const
app
=
express
();
...
...
@@ -13,6 +13,7 @@ const server = app.listen(3000,()=>{
})
app
.
use
(
'/naver'
,
naverRouter
)
app
.
use
(
'/kakao'
,
kakaoRouter
)
app
.
get
(
'/'
,
(
req
,
res
)
=>
{
res
.
send
(
"this is home!"
)
...
...
router/kakao.js
View file @
ba4d069
const
express
=
require
(
'express'
)
const
router
=
express
.
Router
()
const
puppeteer
=
require
(
'puppeteer'
)
const
cheerio
=
require
(
'cheerio'
)
const
sanitizeHtml
=
require
(
'sanitize-html'
)
const
kakaoFunction
=
require
(
'./kakaoFunction'
)
router
.
get
(
'/'
,
(
req
,
res
,
error
)
=>
{
res
.
send
(
'hello this is kakao'
)
})
router
.
get
(
'/recruitment'
,
async
(
req
,
res
,
error
)
=>
{
res
.
send
(
'kakao recruitment'
)
})
\ No newline at end of file
res
.
send
(
await
kakaoFunction
.
getData
())
})
module
.
exports
=
router
\ No newline at end of file
...
...
router/kakaoFunction.js
0 → 100644
View file @
ba4d069
const
puppeteer
=
require
(
'puppeteer'
)
const
cheerio
=
require
(
'cheerio'
)
const
sanitizeHtml
=
require
(
'sanitize-html'
)
const
splitData
=
(
string
)
=>
{
const
temp
=
/
(
<a
([^
>
]
+
)
>
)
/g
.
exec
(
string
)
const
temp2
=
temp
[
0
].
split
(
'"'
);
url
=
"https://careers.kakao.com"
+
temp2
[
1
];
result
=
string
.
replace
(
/
(
<
([^
>
]
+
)
>
)
*
(\\
t
)?
/gi
,
""
).
replace
(
/ /g
,
""
).
split
(
"\n"
).
filter
((
ele
)
=>
ele
!=
''
)
result
.
pop
()
result
.
push
(
url
)
return
result
;
}
const
makeObject
=
(
array
)
=>
{
const
result
=
[]
let
tempData
=
null
for
(
let
i
of
array
){
tempData
=
splitData
(
i
)
result
.
push
({
title
:
tempData
[
0
],
term
:
tempData
[
2
],
tags
:
[],
url
:
tempData
[
tempData
.
length
-
1
]
})
for
(
let
j
of
tempData
){
if
(
j
[
0
]
==
'#'
){
result
[
result
.
length
-
1
].
tags
.
push
(
j
)
}
}
}
return
result
}
const
moveNextPage
=
async
(
page
)
=>
{
await
page
.
click
(
'#mArticle > div > div.paging_list > span > a:nth-child(11) > span > span'
).
catch
((
error
)
=>
{
})
await
page
.
waitForTimeout
(
300
)
return
await
page
.
content
()
}
const
getData
=
async
()
=>
{
const
browser
=
await
puppeteer
.
launch
();
const
page
=
await
browser
.
newPage
();
let
result
=
[]
let
temp
=
""
await
page
.
goto
(
'https://careers.kakao.com/jobs'
)
let
content
=
await
page
.
content
()
while
(
true
){
if
(
temp
==
content
){
console
.
log
(
"finish"
,
result
.
length
)
break
;
}
let
$
=
cheerio
.
load
(
content
,
{
decodeEntities
:
true
})
const
selector
=
'#mArticle > div > ul.list_jobs'
let
resultString
=
sanitizeHtml
(
$
(
selector
),
{
parser
:
{
decodeEntities
:
true
}
})
resArr
=
resultString
.
split
(
'</li>'
)
resArr
.
pop
()
result
=
result
.
concat
(
await
makeObject
(
resArr
))
temp
=
content
content
=
await
moveNextPage
(
page
)
}
console
.
log
(
result
)
return
result
}
module
.
exports
=
{
getData
:
getData
}
router/test.js
deleted
100644 → 0
View file @
ae25b53
const
splitData
=
(
string
)
=>
{
const
temp
=
/
(
<a
([^
>
]
+
)
>
)
/g
.
exec
(
string
)
console
.
log
(
temp
)
const
temp2
=
temp
[
0
].
split
(
'"'
);
console
.
log
(
temp2
)
url
=
"https://recruit.navercorp.com"
+
temp2
[
1
];
console
.
log
(
url
)
result
=
string
.
replace
(
/
(
<
([^
>
]
+
)
>
)
*
(\\
t
)?
/gi
,
""
).
split
(
"\t"
).
filter
((
ele
)
=>
ele
!=
''
)
result
.
pop
()
result
.
push
(
url
)
return
result
;
}
const
string
=
`\t<a href="/naver/job/detail/developer?annoId=20002849&classId=&jobId=&entTypeCd=&searchTxt=&searchSysComCd=">\t\t<span>\t\t\t<strong>[Platform Engineering] Server Engineer</strong>\t\t\t<span>NEW</span>\t\t\t<em>2021.04.26~2021.05.10</em>\t\t\t<span></span>\t\t</span>\t</a>\t<span><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=Platform%20Engineering">#Platform Engineering</a><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=Software%20Development">#Software Development</a><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=Server">#Server</a><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=%EA%B2%BD%EB%A0%A5">#경력</a><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=Backend">#Backend</a>\t </span>`
console
.
log
(
splitData
(
string
))
\ No newline at end of file
Please
register
or
login
to post a comment