김대선

kakao 채용정보 가져오는 기능 개발이 끝나서 master에 병합

1 const express = require('express') 1 const express = require('express')
2 -const puppeteer = require('puppeteer') 2 +
3 +const naverRouter = require('./router/naver.js')
4 +
3 5
4 const naverRouter = require('./router/naver') 6 const naverRouter = require('./router/naver')
5 const kakaoRouter = require('./router/kakao') 7 const kakaoRouter = require('./router/kakao')
6 8
7 9
8 const app = express(); 10 const app = express();
11 +
9 const server = app.listen(3000,()=>{ 12 const server = app.listen(3000,()=>{
10 const host = server.address().address 13 const host = server.address().address
11 const port = server.address().port 14 const port = server.address().port
......
1 const express = require('express') 1 const express = require('express')
2 const router = express.Router() 2 const router = express.Router()
3 3
4 -const puppeteer = require('puppeteer') 4 +const naverFunction = require('./naverFunctions')
5 -const cheerio = require('cheerio')
6 -const sanitizeHtml =require('sanitize-html')
7 -
8 -const splitData = (string)=>{
9 - const temp = /(<a([^>]+)>)/g.exec(string)
10 - const temp2 = temp[0].split('"');
11 - url = "https://recruit.navercorp.com" + temp2[1];
12 - result = string.replace(/(<([^>]+)>)*(\\t)?/gi, "").split("\t").filter((ele)=> ele != '')
13 - result.pop()
14 - result.push(url)
15 - return result;
16 -}
17 -
18 -const makeObject = (array)=>{
19 - const result = []
20 - let tempData = null
21 - for(let i of array){
22 - tempData = splitData(i)
23 - if(tempData.length == 5){
24 - result.push({
25 - title : tempData[0],
26 - term : tempData[2],
27 - tags : tempData[3].split('#').filter((ele)=>ele != ''),
28 - url : tempData[4]
29 - })
30 - }
31 - else if (tempData.length == 4){
32 - result.push({
33 - title : tempData[0],
34 - term : tempData[2],
35 - url : tempData[3]
36 - })
37 - }
38 - }
39 - return result
40 -}
41 -
42 -const getData = async ()=>{
43 - const browser = await puppeteer.launch();
44 - const page = await browser.newPage();
45 -
46 - await page.goto('https://recruit.navercorp.com/naver/job/list/developer')
47 - let content = await page.content()
48 - let temp = null;
49 - while(true){
50 - if (temp == content) break;
51 - else{
52 - temp = await page.content()
53 - await page.click('#moreDiv > button').catch((error)=>{
54 -
55 - })
56 - await page.waitForTimeout(200)
57 - content = await page.content()
58 - }
59 - }
60 - let $ = cheerio.load(content, {decodeEntities: true})
61 -
62 - const selector = '#jobListDiv > ul > li'
63 -
64 - let result = sanitizeHtml($(selector), {
65 - parser : {
66 - decodeEntities: true
67 - }
68 - })
69 - resArr = result.split('</li><li>')
70 - result = makeObject(resArr)
71 - console.log(result)
72 - return result;
73 -}
74 5
75 router.get('/', (req, res, error)=>{ 6 router.get('/', (req, res, error)=>{
76 res.send("naver home") 7 res.send("naver home")
77 }) 8 })
78 9
79 router.get('/recruitment', async (req, res, error)=>{ 10 router.get('/recruitment', async (req, res, error)=>{
80 - res.send(await getData()) 11 + res.send(await naverFunction.getData())
81 }) 12 })
82 13
83 14
......
1 +const puppeteer = require('puppeteer')
2 +const cheerio = require('cheerio')
3 +const sanitizeHtml =require('sanitize-html')
4 +
5 +const splitData = (string)=>{
6 + const temp = /(<a([^>]+)>)/g.exec(string)
7 + const temp2 = temp[0].split('"');
8 + url = "https://recruit.navercorp.com" + temp2[1];
9 + result = string.replace(/(<([^>]+)>)*(\\t)?/gi, "").split("\t").filter((ele)=> ele != '')
10 + result.pop()
11 + result.push(url)
12 + return result;
13 +}
14 +
15 +const makeObject = (array)=>{
16 + const result = []
17 + let tempData = null
18 + for(let i of array){
19 + tempData = splitData(i)
20 + if(tempData.length == 5){
21 + result.push({
22 + title : tempData[0],
23 + term : tempData[2],
24 + tags : tempData[3].split('#').filter((ele)=>ele != ''),
25 + url : tempData[4]
26 + })
27 + }
28 + else if (tempData.length == 4){
29 + result.push({
30 + title : tempData[0],
31 + term : tempData[2],
32 + url : tempData[3]
33 + })
34 + }
35 + }
36 + return result
37 +}
38 +
39 +const getData = async ()=>{
40 + const browser = await puppeteer.launch();
41 + const page = await browser.newPage();
42 +
43 + await page.goto('https://recruit.navercorp.com/naver/job/list/developer')
44 + let content = await page.content()
45 + let temp = null;
46 + while(true){
47 + if (temp == content) break;
48 + else{
49 + temp = await page.content()
50 + await page.click('#moreDiv > button').catch((error)=>{
51 +
52 + })
53 + await page.waitForTimeout(200)
54 + content = await page.content()
55 + }
56 + }
57 + let $ = cheerio.load(content, {decodeEntities: true})
58 +
59 + const selector = '#jobListDiv > ul > li'
60 +
61 + let result = sanitizeHtml($(selector), {
62 + parser : {
63 + decodeEntities: true
64 + }
65 + })
66 + resArr = result.split('</li><li>')
67 + result = makeObject(resArr)
68 + console.log(result)
69 + return result;
70 +}
71 +
72 +module.exports = {
73 + getData : getData
74 +}
...\ No newline at end of file ...\ No newline at end of file