Merge branch 'feature/LotteCinema_Crawling' into 'feature/LotteCinema_Crawling'
Add Crawling LotteCinema Theater Code See merge request !10
Showing
1 changed file
with
34 additions
and
29 deletions
| ... | @@ -6,6 +6,7 @@ const { response } = require('express'); | ... | @@ -6,6 +6,7 @@ const { response } = require('express'); |
| 6 | 6 | ||
| 7 | const puppeteer = require('puppeteer'); | 7 | const puppeteer = require('puppeteer'); |
| 8 | const cheerio = require('cheerio'); | 8 | const cheerio = require('cheerio'); |
| 9 | +const { textContent } = require('domutils'); | ||
| 9 | 10 | ||
| 10 | 11 | ||
| 11 | app.get('/LotteCinema', asyncHandler(async (req, res, next) => { | 12 | app.get('/LotteCinema', asyncHandler(async (req, res, next) => { |
| ... | @@ -28,7 +29,7 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { | ... | @@ -28,7 +29,7 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { |
| 28 | if($(this).find('a > em').text() != "AD"){ | 29 | if($(this).find('a > em').text() != "AD"){ |
| 29 | data[i++]={ | 30 | data[i++]={ |
| 30 | rank : i, | 31 | rank : i, |
| 31 | - url: $(this).find('div.top_info > div > div > a').attr('href').replace(""), | 32 | + url: $(this).find('div.top_info > div > div > a').attr('href'), |
| 32 | title : $(this).find(' div.btm_info > strong').text(), | 33 | title : $(this).find(' div.btm_info > strong').text(), |
| 33 | rate : $(this).find('div.btm_info > span > span.rate_info > em').text(), | 34 | rate : $(this).find('div.btm_info > span > span.rate_info > em').text(), |
| 34 | star : $(this).find('div.btm_info > span > span.star_info').text(), | 35 | star : $(this).find('div.btm_info > span > span.star_info').text(), |
| ... | @@ -41,35 +42,39 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { | ... | @@ -41,35 +42,39 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { |
| 41 | //console.log(data); | 42 | //console.log(data); |
| 42 | })) | 43 | })) |
| 43 | 44 | ||
| 44 | -let server = app.listen(80); | 45 | +app.get('/LotteCinema/theater', asyncHandler(async (req, res, next) => { |
| 45 | - | 46 | + |
| 46 | -// (async () => { | 47 | + const browser = await puppeteer.launch(); |
| 47 | -// const browser = await puppeteer.launch(); | ||
| 48 | - | ||
| 49 | -// const page = await browser.newPage(); | ||
| 50 | - | ||
| 51 | -// // 수집하고자 하는 URL을 입력 | ||
| 52 | -// await page.goto('https://www.lottecinema.co.kr/NLCHS/Movie/List?flag=1'); | ||
| 53 | 48 | ||
| 49 | + const page = await browser.newPage(); | ||
| 54 | 50 | ||
| 55 | -// let content = await page.content(); | 51 | + // 수집하고자 하는 URL을 입력 |
| 56 | -// let $ = cheerio.load(content, {decodeEntities: true}); | 52 | + await page.goto('https://www.lottecinema.co.kr/NLCHS/'); |
| 57 | -// let data = []; | ||
| 58 | -// const $bodyList = $("#contents > div > ul.movie_list.type2").children("li"); | ||
| 59 | 53 | ||
| 60 | -// let i =0; | 54 | + let content = await page.content(); |
| 61 | -// $bodyList.each(function(elem){ | 55 | + let $ = cheerio.load(content, {decodeEntities: true}); |
| 62 | -// if($(this).find('a > em').text() != "AD"){ | 56 | + let theaterData = []; |
| 63 | -// data[i++]={ | 57 | + const $TypeList = $("#nav > ul > li:nth-child(3) > div > ul").children("li").find("div > ul").children("li"); |
| 64 | -// rank : i, | 58 | + let i =0; |
| 65 | -// url: $(this).find('div.top_info > div > div > a').attr('href').replace(""), | 59 | + let flag = 0; |
| 66 | -// title : $(this).find(' div.btm_info > strong').text(), | 60 | + $TypeList.each(function(temp_Type){ |
| 67 | -// rate : $(this).find('div.btm_info > span > span.rate_info > em').text(), | 61 | + if(flag==1){ |
| 68 | -// star : $(this).find('div.btm_info > span > span.star_info').text(), | 62 | + theaterData[i++]={ |
| 69 | -// }; | 63 | + LocateUrl : $(this).find('a').attr('href'), |
| 70 | -// } | 64 | + LocateName : $(this).find('a').text(), |
| 71 | -// }); | 65 | + LocateQuery : $(this).find('a').attr('href') |
| 72 | -// await browser.close(); | 66 | + .replace("https://www.lottecinema.co.kr/NLCHS/Cinema/Detail?divisionCode=","") |
| 67 | + .replace("https://www.lottecinema.co.kr/NLCHS/Cinema/SpecialCinema?divisionCode=","") | ||
| 68 | + .replace("&detailDivisionCode=","|").replace("&cinemaID=","|").replace("&screendivcd=","|"), | ||
| 69 | + }; | ||
| 70 | + }else{ | ||
| 71 | + flag++; | ||
| 72 | + } | ||
| 73 | + }); | ||
| 74 | + await browser.close(); | ||
| 73 | 75 | ||
| 74 | -// console.log(data); | 76 | + res.send(theaterData); |
| 75 | -// })(); | 77 | + //console.log(theaterData); |
| 78 | +})) | ||
| 79 | + | ||
| 80 | +let server = app.listen(80); | ... | ... |
-
Please register or login to post a comment