Showing
1 changed file
with
76 additions
and
0 deletions
... | @@ -2,6 +2,82 @@ const puppeteer = require('puppeteer') | ... | @@ -2,6 +2,82 @@ const puppeteer = require('puppeteer') |
2 | const cheerio = require('cheerio') | 2 | const cheerio = require('cheerio') |
3 | const sanitizeHtml =require('sanitize-html') | 3 | const sanitizeHtml =require('sanitize-html') |
4 | 4 | ||
5 | +const splitData = (string)=>{ | ||
6 | + const temp = /(<a([^>]+)>)/g.exec(string) | ||
7 | + const temp2 = temp[0].split('"'); | ||
8 | + url = "https://programmers.co.kr/job" + temp2[1]; | ||
9 | + result = string.replace(/(<([^>]+)>)*(\\t)?/gi, "").replace(/ /g, "").split("\n").filter((ele)=> ele != '') | ||
10 | + result.pop() | ||
11 | + result.push(url) | ||
12 | + return result; | ||
13 | +} | ||
14 | + | ||
15 | +const makeObject = (array)=>{ | ||
16 | + const result = [] | ||
17 | + let tempData = null | ||
18 | + for(let i of array){ | ||
19 | + tempData = splitData(i) | ||
20 | + result.push({ | ||
21 | + title : tempData[0], | ||
22 | + term : tempData[2], | ||
23 | + tags : [], | ||
24 | + url : tempData[tempData.length - 1] | ||
25 | + }) | ||
26 | + for(let j = 6; j < tempData.length - 1; j++){ | ||
27 | + result[result.length -1].tags.push(tempData[j]) | ||
28 | + } | ||
29 | + } | ||
30 | + return result | ||
31 | +} | ||
32 | + | ||
33 | +const moveNextPage = async (page)=>{ | ||
34 | + | ||
35 | + await page.click('#paginate > nav > ul > li.next.next_page.page-item > a').catch((error)=>{ | ||
36 | + }) | ||
37 | + await page.waitForTimeout(300) | ||
38 | + return await page.content() | ||
39 | +} | ||
40 | + | ||
41 | +const getData = async ()=>{ | ||
42 | + const browser = await puppeteer.launch(); | ||
43 | + const page = await browser.newPage(); | ||
44 | + | ||
45 | + let result = [] | ||
46 | + let temp = "" | ||
47 | + | ||
48 | + await page.goto('https://programmers.co.kr/job') | ||
49 | + let content = await page.content() | ||
50 | + while(true){ | ||
51 | + if(temp == content){ | ||
52 | + console.log("finish", result.length) | ||
53 | + break; | ||
54 | + } | ||
55 | + let $ = cheerio.load(content, {decodeEntities: true}) | ||
56 | + | ||
57 | + let item = "" | ||
58 | + let resArr =[] | ||
59 | + for(let i = 1; i <= 20; i++){ | ||
60 | + selector = `#list-positions-wrapper > ul > li:nth-child(${i})` | ||
61 | + item = sanitizeHtml($(selector), { | ||
62 | + parser : { | ||
63 | + decodeEntities: true | ||
64 | + } | ||
65 | + }) | ||
66 | + if(item =='') break; | ||
67 | + item = item.split("</div>`") | ||
68 | + resArr.push(item[0]) | ||
69 | + | ||
70 | + } | ||
71 | + result = result.concat(await makeObject(resArr)) | ||
72 | + resArr = [] | ||
73 | + temp = content | ||
74 | + content = await moveNextPage(page) | ||
75 | + } | ||
76 | + console.log(result) | ||
77 | + return result | ||
78 | +} | ||
79 | + | ||
80 | +getData() | ||
5 | 81 | ||
6 | module.exports = { | 82 | module.exports = { |
7 | 83 | ... | ... |
-
Please register or login to post a comment