김대선

프로그래머스 정보를 가져와서 분류 완료

...@@ -2,6 +2,82 @@ const puppeteer = require('puppeteer') ...@@ -2,6 +2,82 @@ const puppeteer = require('puppeteer')
2 const cheerio = require('cheerio') 2 const cheerio = require('cheerio')
3 const sanitizeHtml =require('sanitize-html') 3 const sanitizeHtml =require('sanitize-html')
4 4
5 +const splitData = (string)=>{
6 + const temp = /(<a([^>]+)>)/g.exec(string)
7 + const temp2 = temp[0].split('"');
8 + url = "https://programmers.co.kr/job" + temp2[1];
9 + result = string.replace(/(<([^>]+)>)*(\\t)?/gi, "").replace(/ /g, "").split("\n").filter((ele)=> ele != '')
10 + result.pop()
11 + result.push(url)
12 + return result;
13 +}
14 +
15 +const makeObject = (array)=>{
16 + const result = []
17 + let tempData = null
18 + for(let i of array){
19 + tempData = splitData(i)
20 + result.push({
21 + title : tempData[0],
22 + term : tempData[2],
23 + tags : [],
24 + url : tempData[tempData.length - 1]
25 + })
26 + for(let j = 6; j < tempData.length - 1; j++){
27 + result[result.length -1].tags.push(tempData[j])
28 + }
29 + }
30 + return result
31 +}
32 +
33 +const moveNextPage = async (page)=>{
34 +
35 + await page.click('#paginate > nav > ul > li.next.next_page.page-item > a').catch((error)=>{
36 + })
37 + await page.waitForTimeout(300)
38 + return await page.content()
39 +}
40 +
41 +const getData = async ()=>{
42 + const browser = await puppeteer.launch();
43 + const page = await browser.newPage();
44 +
45 + let result = []
46 + let temp = ""
47 +
48 + await page.goto('https://programmers.co.kr/job')
49 + let content = await page.content()
50 + while(true){
51 + if(temp == content){
52 + console.log("finish", result.length)
53 + break;
54 + }
55 + let $ = cheerio.load(content, {decodeEntities: true})
56 +
57 + let item = ""
58 + let resArr =[]
59 + for(let i = 1; i <= 20; i++){
60 + selector = `#list-positions-wrapper > ul > li:nth-child(${i})`
61 + item = sanitizeHtml($(selector), {
62 + parser : {
63 + decodeEntities: true
64 + }
65 + })
66 + if(item =='') break;
67 + item = item.split("</div>`")
68 + resArr.push(item[0])
69 +
70 + }
71 + result = result.concat(await makeObject(resArr))
72 + resArr = []
73 + temp = content
74 + content = await moveNextPage(page)
75 + }
76 + console.log(result)
77 + return result
78 +}
79 +
80 +getData()
5 81
6 module.exports = { 82 module.exports = {
7 83
......