김대선

kakao 채용정보를 가져와서 분류하기 구현

...@@ -2,7 +2,7 @@ const express = require('express') ...@@ -2,7 +2,7 @@ const express = require('express')
2 const puppeteer = require('puppeteer') 2 const puppeteer = require('puppeteer')
3 3
4 const naverRouter = require('./router/naver') 4 const naverRouter = require('./router/naver')
5 - 5 +const kakaoRouter = require('./router/kakao')
6 6
7 7
8 const app = express(); 8 const app = express();
...@@ -13,6 +13,7 @@ const server = app.listen(3000,()=>{ ...@@ -13,6 +13,7 @@ const server = app.listen(3000,()=>{
13 }) 13 })
14 14
15 app.use('/naver', naverRouter) 15 app.use('/naver', naverRouter)
16 +app.use('/kakao', kakaoRouter)
16 17
17 app.get('/', (req, res)=>{ 18 app.get('/', (req, res)=>{
18 res.send("this is home!") 19 res.send("this is home!")
......
1 const express = require('express') 1 const express = require('express')
2 const router = express.Router() 2 const router = express.Router()
3 3
4 -const puppeteer = require('puppeteer') 4 +const kakaoFunction = require('./kakaoFunction')
5 -const cheerio = require('cheerio')
6 -const sanitizeHtml =require('sanitize-html')
7 5
8 router.get('/', (req, res, error)=>{ 6 router.get('/', (req, res, error)=>{
9 res.send('hello this is kakao') 7 res.send('hello this is kakao')
10 }) 8 })
11 9
12 router.get('/recruitment', async (req, res, error)=>{ 10 router.get('/recruitment', async (req, res, error)=>{
13 - res.send('kakao recruitment')
14 -})
...\ No newline at end of file ...\ No newline at end of file
11 + res.send(await kakaoFunction.getData())
12 +})
13 +
14 +module.exports = router
...\ No newline at end of file ...\ No newline at end of file
......
1 +const puppeteer = require('puppeteer')
2 +const cheerio = require('cheerio')
3 +const sanitizeHtml =require('sanitize-html')
4 +
5 +const splitData = (string)=>{
6 + const temp = /(<a([^>]+)>)/g.exec(string)
7 + const temp2 = temp[0].split('"');
8 + url = "https://careers.kakao.com" + temp2[1];
9 + result = string.replace(/(<([^>]+)>)*(\\t)?/gi, "").replace(/ /g, "").split("\n").filter((ele)=> ele != '')
10 + result.pop()
11 + result.push(url)
12 + return result;
13 +}
14 +
15 +const makeObject = (array)=>{
16 + const result = []
17 + let tempData = null
18 + for(let i of array){
19 + tempData = splitData(i)
20 + result.push({
21 + title : tempData[0],
22 + term : tempData[2],
23 + tags : [],
24 + url : tempData[tempData.length - 1]
25 + })
26 + for(let j of tempData){
27 + if(j[0] == '#'){
28 + result[result.length - 1].tags.push(j)
29 + }
30 + }
31 + }
32 + return result
33 +}
34 +
35 +const moveNextPage = async (page)=>{
36 +
37 + await page.click('#mArticle > div > div.paging_list > span > a:nth-child(11) > span > span').catch((error)=>{
38 + })
39 + await page.waitForTimeout(300)
40 + return await page.content()
41 +}
42 +
43 +const getData = async ()=>{
44 + const browser = await puppeteer.launch();
45 + const page = await browser.newPage();
46 +
47 + let result = []
48 + let temp = ""
49 +
50 + await page.goto('https://careers.kakao.com/jobs')
51 + let content = await page.content()
52 + while(true){
53 + if(temp == content){
54 + console.log("finish", result.length)
55 + break;
56 + }
57 + let $ = cheerio.load(content, {decodeEntities: true})
58 +
59 + const selector = '#mArticle > div > ul.list_jobs'
60 +
61 + let resultString = sanitizeHtml($(selector), {
62 + parser : {
63 + decodeEntities: true
64 + }
65 + })
66 + resArr = resultString.split('</li>')
67 + resArr.pop()
68 + result = result.concat(await makeObject(resArr))
69 + temp = content
70 + content = await moveNextPage(page)
71 + }
72 + console.log(result)
73 + return result
74 +}
75 +
76 +module.exports = {
77 + getData : getData
78 +}
79 +
1 -const splitData = (string)=>{
2 - const temp = /(<a([^>]+)>)/g.exec(string)
3 - console.log(temp)
4 - const temp2 = temp[0].split('"');
5 - console.log(temp2)
6 - url = "https://recruit.navercorp.com" + temp2[1];
7 - console.log(url)
8 - result = string.replace(/(<([^>]+)>)*(\\t)?/gi, "").split("\t").filter((ele)=> ele != '')
9 - result.pop()
10 - result.push(url)
11 - return result;
12 -}
13 -
14 -const string = `\t<a href="/naver/job/detail/developer?annoId=20002849&amp;classId=&amp;jobId=&amp;entTypeCd=&amp;searchTxt=&amp;searchSysComCd=">\t\t<span>\t\t\t<strong>[Platform Engineering] Server Engineer</strong>\t\t\t<span>NEW</span>\t\t\t<em>2021.04.26~2021.05.10</em>\t\t\t<span></span>\t\t</span>\t</a>\t<span><a href="/naver/job/list/developer?entTypeCd=&amp;searchSysComCd=&amp;searchTxt=Platform%20Engineering">#Platform Engineering</a><a href="/naver/job/list/developer?entTypeCd=&amp;searchSysComCd=&amp;searchTxt=Software%20Development">#Software Development</a><a href="/naver/job/list/developer?entTypeCd=&amp;searchSysComCd=&amp;searchTxt=Server">#Server</a><a href="/naver/job/list/developer?entTypeCd=&amp;searchSysComCd=&amp;searchTxt=%EA%B2%BD%EB%A0%A5">#경력</a><a href="/naver/job/list/developer?entTypeCd=&amp;searchSysComCd=&amp;searchTxt=Backend">#Backend</a>\t </span>`
15 -
16 -console.log(splitData(string))
...\ No newline at end of file ...\ No newline at end of file