Showing
3 changed files
with
31 additions
and
18 deletions
... | @@ -8,7 +8,7 @@ const app = express(); | ... | @@ -8,7 +8,7 @@ const app = express(); |
8 | 8 | ||
9 | 9 | ||
10 | // 0초 0분 0시 아무날 아무달 아무년 | 10 | // 0초 0분 0시 아무날 아무달 아무년 |
11 | -const saveData = schedule.scheduleJob('55 30 16 * * *', dataFunctions.save) | 11 | +const saveData = schedule.scheduleJob('55 45 20 * * *', dataFunctions.save) |
12 | 12 | ||
13 | const server = app.listen(3000,()=>{ | 13 | const server = app.listen(3000,()=>{ |
14 | const host = server.address().address | 14 | const host = server.address().address | ... | ... |
... | @@ -78,12 +78,14 @@ const save = async()=> { | ... | @@ -78,12 +78,14 @@ const save = async()=> { |
78 | const today = new Date() | 78 | const today = new Date() |
79 | fs.writeFile(`./datas/${today.getFullYear()}.${today.getMonth()}.${today.getDate()}`, string, 'utf-8', (err)=>{ | 79 | fs.writeFile(`./datas/${today.getFullYear()}.${today.getMonth()}.${today.getDate()}`, string, 'utf-8', (err)=>{ |
80 | if(err){ | 80 | if(err){ |
81 | + console.log("파일저장시에 오류") | ||
81 | console.log(err) | 82 | console.log(err) |
82 | save() | 83 | save() |
83 | } | 84 | } |
84 | else console.log("저장완료!") | 85 | else console.log("저장완료!") |
85 | }) | 86 | }) |
86 | } catch (error) { | 87 | } catch (error) { |
88 | + console.log("데이터 가져오는 과정에서 오류") | ||
87 | console.log(error) | 89 | console.log(error) |
88 | save() | 90 | save() |
89 | } | 91 | } | ... | ... |
... | @@ -4,12 +4,18 @@ const sanitizeHtml =require('sanitize-html') | ... | @@ -4,12 +4,18 @@ const sanitizeHtml =require('sanitize-html') |
4 | 4 | ||
5 | const splitData = (string)=>{ | 5 | const splitData = (string)=>{ |
6 | const temp = /(<a([^>]+)>)/g.exec(string) | 6 | const temp = /(<a([^>]+)>)/g.exec(string) |
7 | + try { | ||
7 | const temp2 = temp[0].split('"'); | 8 | const temp2 = temp[0].split('"'); |
8 | url = "https://programmers.co.kr/job" + temp2[1]; | 9 | url = "https://programmers.co.kr/job" + temp2[1]; |
9 | result = string.replace(/(<([^>]+)>)*(\\t)?/gi, "").replace(/ /g, "").split("\n").filter((ele)=> ele != '') | 10 | result = string.replace(/(<([^>]+)>)*(\\t)?/gi, "").replace(/ /g, "").split("\n").filter((ele)=> ele != '') |
10 | result.pop() | 11 | result.pop() |
11 | result.push(url) | 12 | result.push(url) |
12 | return result; | 13 | return result; |
14 | + } catch (error) { | ||
15 | + console.log(temp) | ||
16 | + console.log(string) | ||
17 | + } | ||
18 | + | ||
13 | } | 19 | } |
14 | 20 | ||
15 | const makeObject = (array)=>{ | 21 | const makeObject = (array)=>{ |
... | @@ -17,6 +23,10 @@ const makeObject = (array)=>{ | ... | @@ -17,6 +23,10 @@ const makeObject = (array)=>{ |
17 | let tempData = null | 23 | let tempData = null |
18 | for(let i of array){ | 24 | for(let i of array){ |
19 | tempData = splitData(i) | 25 | tempData = splitData(i) |
26 | + if(!tempData){ | ||
27 | + console.log("error") | ||
28 | + continue | ||
29 | + } | ||
20 | result.push({ | 30 | result.push({ |
21 | title : tempData[0], | 31 | title : tempData[0], |
22 | tags : [], | 32 | tags : [], |
... | @@ -30,29 +40,31 @@ const makeObject = (array)=>{ | ... | @@ -30,29 +40,31 @@ const makeObject = (array)=>{ |
30 | return result | 40 | return result |
31 | } | 41 | } |
32 | 42 | ||
33 | -const moveNextPage = async (page)=>{ | ||
34 | - | ||
35 | - await page.click('#paginate > nav > ul > li.next.next_page.page-item > a').catch((error)=>{ | ||
36 | - }) | ||
37 | - await page.waitForTimeout(300) | ||
38 | - return await page.content() | ||
39 | -} | ||
40 | - | ||
41 | const getData = async ()=>{ | 43 | const getData = async ()=>{ |
42 | const browser = await puppeteer.launch(); | 44 | const browser = await puppeteer.launch(); |
43 | const page = await browser.newPage(); | 45 | const page = await browser.newPage(); |
44 | 46 | ||
45 | let result = [] | 47 | let result = [] |
46 | let temp = "" | 48 | let temp = "" |
47 | - | 49 | + let count = 1; |
48 | await page.goto('https://programmers.co.kr/job') | 50 | await page.goto('https://programmers.co.kr/job') |
51 | + let selector = '#paginate > nav > ul > li:nth-child(8) > a' | ||
49 | let content = await page.content() | 52 | let content = await page.content() |
53 | + let $ = cheerio.load(content, {decodeEntities: true}) | ||
54 | + const final = sanitizeHtml($(selector), { | ||
55 | + parser : { | ||
56 | + decodeEntities : true | ||
57 | + } | ||
58 | + }).replace(/(<([^>]+)>)*(\\t)?/gi, "") | ||
59 | + console.log(final) | ||
50 | while(true){ | 60 | while(true){ |
51 | - if(temp == content){ | 61 | + await page.goto(`https://programmers.co.kr/job?page=${count}`) |
62 | + content = await page.content() | ||
63 | + if(final < count){ | ||
52 | console.log("finish", result.length) | 64 | console.log("finish", result.length) |
53 | break; | 65 | break; |
54 | } | 66 | } |
55 | - let $ = cheerio.load(content, {decodeEntities: true}) | 67 | + $ = cheerio.load(content, {decodeEntities: true}) |
56 | 68 | ||
57 | let item = "" | 69 | let item = "" |
58 | let resArr =[] | 70 | let resArr =[] |
... | @@ -63,22 +75,21 @@ const getData = async ()=>{ | ... | @@ -63,22 +75,21 @@ const getData = async ()=>{ |
63 | decodeEntities: true | 75 | decodeEntities: true |
64 | } | 76 | } |
65 | }) | 77 | }) |
66 | - if(item =='') break; | 78 | + if(item ==''){ |
79 | + console.log("break!!!!") | ||
80 | + break; | ||
81 | + } | ||
67 | item = item.split("</div>`") | 82 | item = item.split("</div>`") |
68 | resArr.push(item[0]) | 83 | resArr.push(item[0]) |
69 | 84 | ||
70 | } | 85 | } |
71 | result = result.concat(await makeObject(resArr)) | 86 | result = result.concat(await makeObject(resArr)) |
87 | + count = count + 1 | ||
72 | resArr = [] | 88 | resArr = [] |
73 | - temp = content | ||
74 | - content = await moveNextPage(page) | ||
75 | } | 89 | } |
76 | return result | 90 | return result |
77 | } | 91 | } |
78 | 92 | ||
79 | - | ||
80 | module.exports = { | 93 | module.exports = { |
81 | getData : getData | 94 | getData : getData |
82 | } | 95 | } |
83 | - | ||
84 | -getData() | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment