김대선

프로그래머스 데이터 가져오는 부분 오류 수정

...@@ -8,7 +8,7 @@ const app = express(); ...@@ -8,7 +8,7 @@ const app = express();
8 8
9 9
10 // 0초 0분 0시 아무날 아무달 아무년 10 // 0초 0분 0시 아무날 아무달 아무년
11 -const saveData = schedule.scheduleJob('55 30 16 * * *', dataFunctions.save) 11 +const saveData = schedule.scheduleJob('55 45 20 * * *', dataFunctions.save)
12 12
13 const server = app.listen(3000,()=>{ 13 const server = app.listen(3000,()=>{
14 const host = server.address().address 14 const host = server.address().address
......
...@@ -78,12 +78,14 @@ const save = async()=> { ...@@ -78,12 +78,14 @@ const save = async()=> {
78 const today = new Date() 78 const today = new Date()
79 fs.writeFile(`./datas/${today.getFullYear()}.${today.getMonth()}.${today.getDate()}`, string, 'utf-8', (err)=>{ 79 fs.writeFile(`./datas/${today.getFullYear()}.${today.getMonth()}.${today.getDate()}`, string, 'utf-8', (err)=>{
80 if(err){ 80 if(err){
81 + console.log("파일저장시에 오류")
81 console.log(err) 82 console.log(err)
82 save() 83 save()
83 } 84 }
84 else console.log("저장완료!") 85 else console.log("저장완료!")
85 }) 86 })
86 } catch (error) { 87 } catch (error) {
88 + console.log("데이터 가져오는 과정에서 오류")
87 console.log(error) 89 console.log(error)
88 save() 90 save()
89 } 91 }
......
...@@ -4,12 +4,18 @@ const sanitizeHtml =require('sanitize-html') ...@@ -4,12 +4,18 @@ const sanitizeHtml =require('sanitize-html')
4 4
5 const splitData = (string)=>{ 5 const splitData = (string)=>{
6 const temp = /(<a([^>]+)>)/g.exec(string) 6 const temp = /(<a([^>]+)>)/g.exec(string)
7 - const temp2 = temp[0].split('"'); 7 + try {
8 - url = "https://programmers.co.kr/job" + temp2[1]; 8 + const temp2 = temp[0].split('"');
9 - result = string.replace(/(<([^>]+)>)*(\\t)?/gi, "").replace(/ /g, "").split("\n").filter((ele)=> ele != '') 9 + url = "https://programmers.co.kr/job" + temp2[1];
10 - result.pop() 10 + result = string.replace(/(<([^>]+)>)*(\\t)?/gi, "").replace(/ /g, "").split("\n").filter((ele)=> ele != '')
11 - result.push(url) 11 + result.pop()
12 - return result; 12 + result.push(url)
13 + return result;
14 + } catch (error) {
15 + console.log(temp)
16 + console.log(string)
17 + }
18 +
13 } 19 }
14 20
15 const makeObject = (array)=>{ 21 const makeObject = (array)=>{
...@@ -17,6 +23,10 @@ const makeObject = (array)=>{ ...@@ -17,6 +23,10 @@ const makeObject = (array)=>{
17 let tempData = null 23 let tempData = null
18 for(let i of array){ 24 for(let i of array){
19 tempData = splitData(i) 25 tempData = splitData(i)
26 + if(!tempData){
27 + console.log("error")
28 + continue
29 + }
20 result.push({ 30 result.push({
21 title : tempData[0], 31 title : tempData[0],
22 tags : [], 32 tags : [],
...@@ -30,29 +40,31 @@ const makeObject = (array)=>{ ...@@ -30,29 +40,31 @@ const makeObject = (array)=>{
30 return result 40 return result
31 } 41 }
32 42
33 -const moveNextPage = async (page)=>{
34 -
35 - await page.click('#paginate > nav > ul > li.next.next_page.page-item > a').catch((error)=>{
36 - })
37 - await page.waitForTimeout(300)
38 - return await page.content()
39 -}
40 -
41 const getData = async ()=>{ 43 const getData = async ()=>{
42 const browser = await puppeteer.launch(); 44 const browser = await puppeteer.launch();
43 const page = await browser.newPage(); 45 const page = await browser.newPage();
44 46
45 let result = [] 47 let result = []
46 let temp = "" 48 let temp = ""
47 - 49 + let count = 1;
48 await page.goto('https://programmers.co.kr/job') 50 await page.goto('https://programmers.co.kr/job')
51 + let selector = '#paginate > nav > ul > li:nth-child(8) > a'
49 let content = await page.content() 52 let content = await page.content()
53 + let $ = cheerio.load(content, {decodeEntities: true})
54 + const final = sanitizeHtml($(selector), {
55 + parser : {
56 + decodeEntities : true
57 + }
58 + }).replace(/(<([^>]+)>)*(\\t)?/gi, "")
59 + console.log(final)
50 while(true){ 60 while(true){
51 - if(temp == content){ 61 + await page.goto(`https://programmers.co.kr/job?page=${count}`)
62 + content = await page.content()
63 + if(final < count){
52 console.log("finish", result.length) 64 console.log("finish", result.length)
53 break; 65 break;
54 } 66 }
55 - let $ = cheerio.load(content, {decodeEntities: true}) 67 + $ = cheerio.load(content, {decodeEntities: true})
56 68
57 let item = "" 69 let item = ""
58 let resArr =[] 70 let resArr =[]
...@@ -63,22 +75,21 @@ const getData = async ()=>{ ...@@ -63,22 +75,21 @@ const getData = async ()=>{
63 decodeEntities: true 75 decodeEntities: true
64 } 76 }
65 }) 77 })
66 - if(item =='') break; 78 + if(item ==''){
79 + console.log("break!!!!")
80 + break;
81 + }
67 item = item.split("</div>`") 82 item = item.split("</div>`")
68 resArr.push(item[0]) 83 resArr.push(item[0])
69 84
70 } 85 }
71 result = result.concat(await makeObject(resArr)) 86 result = result.concat(await makeObject(resArr))
87 + count = count + 1
72 resArr = [] 88 resArr = []
73 - temp = content
74 - content = await moveNextPage(page)
75 } 89 }
76 return result 90 return result
77 } 91 }
78 92
79 -
80 module.exports = { 93 module.exports = {
81 getData : getData 94 getData : getData
82 } 95 }
83 -
84 -getData()
...\ No newline at end of file ...\ No newline at end of file
......