Toggle navigation
Toggle navigation
This project
Loading...
Sign in
김대선
/
Recruitment_Information_chatbot
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
김대선
2021-05-14 00:08:10 +0900
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
0b877cb4b3ca79618b8c97bd51d2b2574e801736
0b877cb4
1 parent
39abd470
프로그래머스 정보를 가져와서 분류 완료
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
76 additions
and
0 deletions
router/programmersFunction.js
router/programmersFunction.js
View file @
0b877cb
...
...
@@ -2,6 +2,82 @@ const puppeteer = require('puppeteer')
const
cheerio
=
require
(
'cheerio'
)
const
sanitizeHtml
=
require
(
'sanitize-html'
)
const
splitData
=
(
string
)
=>
{
const
temp
=
/
(
<a
([^
>
]
+
)
>
)
/g
.
exec
(
string
)
const
temp2
=
temp
[
0
].
split
(
'"'
);
url
=
"https://programmers.co.kr/job"
+
temp2
[
1
];
result
=
string
.
replace
(
/
(
<
([^
>
]
+
)
>
)
*
(\\
t
)?
/gi
,
""
).
replace
(
/ /g
,
""
).
split
(
"\n"
).
filter
((
ele
)
=>
ele
!=
''
)
result
.
pop
()
result
.
push
(
url
)
return
result
;
}
const
makeObject
=
(
array
)
=>
{
const
result
=
[]
let
tempData
=
null
for
(
let
i
of
array
){
tempData
=
splitData
(
i
)
result
.
push
({
title
:
tempData
[
0
],
term
:
tempData
[
2
],
tags
:
[],
url
:
tempData
[
tempData
.
length
-
1
]
})
for
(
let
j
=
6
;
j
<
tempData
.
length
-
1
;
j
++
){
result
[
result
.
length
-
1
].
tags
.
push
(
tempData
[
j
])
}
}
return
result
}
const
moveNextPage
=
async
(
page
)
=>
{
await
page
.
click
(
'#paginate > nav > ul > li.next.next_page.page-item > a'
).
catch
((
error
)
=>
{
})
await
page
.
waitForTimeout
(
300
)
return
await
page
.
content
()
}
const
getData
=
async
()
=>
{
const
browser
=
await
puppeteer
.
launch
();
const
page
=
await
browser
.
newPage
();
let
result
=
[]
let
temp
=
""
await
page
.
goto
(
'https://programmers.co.kr/job'
)
let
content
=
await
page
.
content
()
while
(
true
){
if
(
temp
==
content
){
console
.
log
(
"finish"
,
result
.
length
)
break
;
}
let
$
=
cheerio
.
load
(
content
,
{
decodeEntities
:
true
})
let
item
=
""
let
resArr
=
[]
for
(
let
i
=
1
;
i
<=
20
;
i
++
){
selector
=
`#list-positions-wrapper > ul > li:nth-child(
${
i
}
)`
item
=
sanitizeHtml
(
$
(
selector
),
{
parser
:
{
decodeEntities
:
true
}
})
if
(
item
==
''
)
break
;
item
=
item
.
split
(
"</div>`"
)
resArr
.
push
(
item
[
0
])
}
result
=
result
.
concat
(
await
makeObject
(
resArr
))
resArr
=
[]
temp
=
content
content
=
await
moveNextPage
(
page
)
}
console
.
log
(
result
)
return
result
}
getData
()
module
.
exports
=
{
...
...
Please
register
or
login
to post a comment