cheerio, sanitize-html 설치 및 네이버 채용정보 데이터를 받아와서, 항목별로 나누기 까지 구현
Showing
1 changed file
with
59 additions
and
0 deletions
router/naver.js
0 → 100644
1 | +const express = require('express') | ||
2 | +const router = express.Router() | ||
3 | + | ||
4 | +const puppeteer = require('puppeteer') | ||
5 | +const cheerio = require('cheerio') | ||
6 | +const sanitizeHtml =require('sanitize-html') | ||
7 | + | ||
8 | +const splitData = (string)=>{ | ||
9 | + return string.replace(/(<([^>]+)>)*(\\t)?/gi, "").split("\t").filter((ele)=> ele != '') | ||
10 | +} | ||
11 | + | ||
12 | +const makeObject = (array)=>{ | ||
13 | + const result = []; | ||
14 | + for(let i of array){ | ||
15 | + result.push(splitData(i)) | ||
16 | + } | ||
17 | + | ||
18 | + return result | ||
19 | +} | ||
20 | + | ||
21 | +router.get('/', (req, res, error)=>{ | ||
22 | + res.send("naver home") | ||
23 | +}) | ||
24 | + | ||
25 | +router.get('/recruitment', async (req, res, error)=>{ | ||
26 | + const browser = await puppeteer.launch(); | ||
27 | + const page = await browser.newPage(); | ||
28 | + | ||
29 | + await page.goto('https://recruit.navercorp.com/naver/job/list/developer') | ||
30 | + let content = await page.content() | ||
31 | + let temp = null; | ||
32 | + while(true){ | ||
33 | + if (temp == content) break; | ||
34 | + else{ | ||
35 | + temp = await page.content() | ||
36 | + await page.click('#moreDiv > button').catch((error)=>{ | ||
37 | + | ||
38 | + }) | ||
39 | + await page.waitForTimeout(300) | ||
40 | + content = await page.content() | ||
41 | + } | ||
42 | + } | ||
43 | + let $ = cheerio.load(content, {decodeEntities: true}) | ||
44 | + | ||
45 | + const selector = '#jobListDiv > ul > li' | ||
46 | + | ||
47 | + let result = sanitizeHtml($(selector), { | ||
48 | + parser : { | ||
49 | + decodeEntities: true | ||
50 | + } | ||
51 | + }) | ||
52 | + resArr = result.split('</li><li>') | ||
53 | + result = makeObject(resArr) | ||
54 | + console.log(result) | ||
55 | + res.send(result) | ||
56 | +}) | ||
57 | + | ||
58 | + | ||
59 | +module.exports = router | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment