Showing
4 changed files
with
85 additions
and
21 deletions
... | @@ -2,7 +2,7 @@ const express = require('express') | ... | @@ -2,7 +2,7 @@ const express = require('express') |
2 | const puppeteer = require('puppeteer') | 2 | const puppeteer = require('puppeteer') |
3 | 3 | ||
4 | const naverRouter = require('./router/naver') | 4 | const naverRouter = require('./router/naver') |
5 | - | 5 | +const kakaoRouter = require('./router/kakao') |
6 | 6 | ||
7 | 7 | ||
8 | const app = express(); | 8 | const app = express(); |
... | @@ -13,6 +13,7 @@ const server = app.listen(3000,()=>{ | ... | @@ -13,6 +13,7 @@ const server = app.listen(3000,()=>{ |
13 | }) | 13 | }) |
14 | 14 | ||
15 | app.use('/naver', naverRouter) | 15 | app.use('/naver', naverRouter) |
16 | +app.use('/kakao', kakaoRouter) | ||
16 | 17 | ||
17 | app.get('/', (req, res)=>{ | 18 | app.get('/', (req, res)=>{ |
18 | res.send("this is home!") | 19 | res.send("this is home!") | ... | ... |
1 | const express = require('express') | 1 | const express = require('express') |
2 | const router = express.Router() | 2 | const router = express.Router() |
3 | 3 | ||
4 | -const puppeteer = require('puppeteer') | 4 | +const kakaoFunction = require('./kakaoFunction') |
5 | -const cheerio = require('cheerio') | ||
6 | -const sanitizeHtml =require('sanitize-html') | ||
7 | 5 | ||
8 | router.get('/', (req, res, error)=>{ | 6 | router.get('/', (req, res, error)=>{ |
9 | res.send('hello this is kakao') | 7 | res.send('hello this is kakao') |
10 | }) | 8 | }) |
11 | 9 | ||
12 | router.get('/recruitment', async (req, res, error)=>{ | 10 | router.get('/recruitment', async (req, res, error)=>{ |
13 | - res.send('kakao recruitment') | 11 | + res.send(await kakaoFunction.getData()) |
14 | }) | 12 | }) |
13 | + | ||
14 | +module.exports = router | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
router/kakaoFunction.js
0 → 100644
1 | +const puppeteer = require('puppeteer') | ||
2 | +const cheerio = require('cheerio') | ||
3 | +const sanitizeHtml =require('sanitize-html') | ||
4 | + | ||
5 | +const splitData = (string)=>{ | ||
6 | + const temp = /(<a([^>]+)>)/g.exec(string) | ||
7 | + const temp2 = temp[0].split('"'); | ||
8 | + url = "https://careers.kakao.com" + temp2[1]; | ||
9 | + result = string.replace(/(<([^>]+)>)*(\\t)?/gi, "").replace(/ /g, "").split("\n").filter((ele)=> ele != '') | ||
10 | + result.pop() | ||
11 | + result.push(url) | ||
12 | + return result; | ||
13 | +} | ||
14 | + | ||
15 | +const makeObject = (array)=>{ | ||
16 | + const result = [] | ||
17 | + let tempData = null | ||
18 | + for(let i of array){ | ||
19 | + tempData = splitData(i) | ||
20 | + result.push({ | ||
21 | + title : tempData[0], | ||
22 | + term : tempData[2], | ||
23 | + tags : [], | ||
24 | + url : tempData[tempData.length - 1] | ||
25 | + }) | ||
26 | + for(let j of tempData){ | ||
27 | + if(j[0] == '#'){ | ||
28 | + result[result.length - 1].tags.push(j) | ||
29 | + } | ||
30 | + } | ||
31 | + } | ||
32 | + return result | ||
33 | +} | ||
34 | + | ||
35 | +const moveNextPage = async (page)=>{ | ||
36 | + | ||
37 | + await page.click('#mArticle > div > div.paging_list > span > a:nth-child(11) > span > span').catch((error)=>{ | ||
38 | + }) | ||
39 | + await page.waitForTimeout(300) | ||
40 | + return await page.content() | ||
41 | +} | ||
42 | + | ||
43 | +const getData = async ()=>{ | ||
44 | + const browser = await puppeteer.launch(); | ||
45 | + const page = await browser.newPage(); | ||
46 | + | ||
47 | + let result = [] | ||
48 | + let temp = "" | ||
49 | + | ||
50 | + await page.goto('https://careers.kakao.com/jobs') | ||
51 | + let content = await page.content() | ||
52 | + while(true){ | ||
53 | + if(temp == content){ | ||
54 | + console.log("finish", result.length) | ||
55 | + break; | ||
56 | + } | ||
57 | + let $ = cheerio.load(content, {decodeEntities: true}) | ||
58 | + | ||
59 | + const selector = '#mArticle > div > ul.list_jobs' | ||
60 | + | ||
61 | + let resultString = sanitizeHtml($(selector), { | ||
62 | + parser : { | ||
63 | + decodeEntities: true | ||
64 | + } | ||
65 | + }) | ||
66 | + resArr = resultString.split('</li>') | ||
67 | + resArr.pop() | ||
68 | + result = result.concat(await makeObject(resArr)) | ||
69 | + temp = content | ||
70 | + content = await moveNextPage(page) | ||
71 | + } | ||
72 | + console.log(result) | ||
73 | + return result | ||
74 | +} | ||
75 | + | ||
76 | +module.exports = { | ||
77 | + getData : getData | ||
78 | +} | ||
79 | + |
router/test.js
deleted
100644 → 0
1 | -const splitData = (string)=>{ | ||
2 | - const temp = /(<a([^>]+)>)/g.exec(string) | ||
3 | - console.log(temp) | ||
4 | - const temp2 = temp[0].split('"'); | ||
5 | - console.log(temp2) | ||
6 | - url = "https://recruit.navercorp.com" + temp2[1]; | ||
7 | - console.log(url) | ||
8 | - result = string.replace(/(<([^>]+)>)*(\\t)?/gi, "").split("\t").filter((ele)=> ele != '') | ||
9 | - result.pop() | ||
10 | - result.push(url) | ||
11 | - return result; | ||
12 | -} | ||
13 | - | ||
14 | -const string = `\t<a href="/naver/job/detail/developer?annoId=20002849&classId=&jobId=&entTypeCd=&searchTxt=&searchSysComCd=">\t\t<span>\t\t\t<strong>[Platform Engineering] Server Engineer</strong>\t\t\t<span>NEW</span>\t\t\t<em>2021.04.26~2021.05.10</em>\t\t\t<span></span>\t\t</span>\t</a>\t<span><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=Platform%20Engineering">#Platform Engineering</a><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=Software%20Development">#Software Development</a><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=Server">#Server</a><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=%EA%B2%BD%EB%A0%A5">#경력</a><a href="/naver/job/list/developer?entTypeCd=&searchSysComCd=&searchTxt=Backend">#Backend</a>\t </span>` | ||
15 | - | ||
16 | -console.log(splitData(string)) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment