임승현

Merge branch 'feature/LotteCinema_Crawling' into 'feature/LotteCinema_Crawling'

Add Crawling LotteCinema Theater Code



See merge request !10
...@@ -6,6 +6,7 @@ const { response } = require('express'); ...@@ -6,6 +6,7 @@ const { response } = require('express');
6 6
7 const puppeteer = require('puppeteer'); 7 const puppeteer = require('puppeteer');
8 const cheerio = require('cheerio'); 8 const cheerio = require('cheerio');
9 +const { textContent } = require('domutils');
9 10
10 11
11 app.get('/LotteCinema', asyncHandler(async (req, res, next) => { 12 app.get('/LotteCinema', asyncHandler(async (req, res, next) => {
...@@ -28,7 +29,7 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { ...@@ -28,7 +29,7 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => {
28 if($(this).find('a > em').text() != "AD"){ 29 if($(this).find('a > em').text() != "AD"){
29 data[i++]={ 30 data[i++]={
30 rank : i, 31 rank : i,
31 - url: $(this).find('div.top_info > div > div > a').attr('href').replace(""), 32 + url: $(this).find('div.top_info > div > div > a').attr('href'),
32 title : $(this).find(' div.btm_info > strong').text(), 33 title : $(this).find(' div.btm_info > strong').text(),
33 rate : $(this).find('div.btm_info > span > span.rate_info > em').text(), 34 rate : $(this).find('div.btm_info > span > span.rate_info > em').text(),
34 star : $(this).find('div.btm_info > span > span.star_info').text(), 35 star : $(this).find('div.btm_info > span > span.star_info').text(),
...@@ -41,35 +42,39 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { ...@@ -41,35 +42,39 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => {
41 //console.log(data); 42 //console.log(data);
42 })) 43 }))
43 44
44 -let server = app.listen(80); 45 +app.get('/LotteCinema/theater', asyncHandler(async (req, res, next) => {
45 - 46 +
46 -// (async () => { 47 + const browser = await puppeteer.launch();
47 -// const browser = await puppeteer.launch();
48 -
49 -// const page = await browser.newPage();
50 -
51 -// // 수집하고자 하는 URL을 입력
52 -// await page.goto('https://www.lottecinema.co.kr/NLCHS/Movie/List?flag=1');
53 48
49 + const page = await browser.newPage();
54 50
55 -// let content = await page.content(); 51 + // 수집하고자 하는 URL을 입력
56 -// let $ = cheerio.load(content, {decodeEntities: true}); 52 + await page.goto('https://www.lottecinema.co.kr/NLCHS/');
57 -// let data = [];
58 -// const $bodyList = $("#contents > div > ul.movie_list.type2").children("li");
59 53
60 -// let i =0; 54 + let content = await page.content();
61 -// $bodyList.each(function(elem){ 55 + let $ = cheerio.load(content, {decodeEntities: true});
62 -// if($(this).find('a > em').text() != "AD"){ 56 + let theaterData = [];
63 -// data[i++]={ 57 + const $TypeList = $("#nav > ul > li:nth-child(3) > div > ul").children("li").find("div > ul").children("li");
64 -// rank : i, 58 + let i =0;
65 -// url: $(this).find('div.top_info > div > div > a').attr('href').replace(""), 59 + let flag = 0;
66 -// title : $(this).find(' div.btm_info > strong').text(), 60 + $TypeList.each(function(temp_Type){
67 -// rate : $(this).find('div.btm_info > span > span.rate_info > em').text(), 61 + if(flag==1){
68 -// star : $(this).find('div.btm_info > span > span.star_info').text(), 62 + theaterData[i++]={
69 -// }; 63 + LocateUrl : $(this).find('a').attr('href'),
70 -// } 64 + LocateName : $(this).find('a').text(),
71 -// }); 65 + LocateQuery : $(this).find('a').attr('href')
72 -// await browser.close(); 66 + .replace("https://www.lottecinema.co.kr/NLCHS/Cinema/Detail?divisionCode=","")
67 + .replace("https://www.lottecinema.co.kr/NLCHS/Cinema/SpecialCinema?divisionCode=","")
68 + .replace("&detailDivisionCode=","|").replace("&cinemaID=","|").replace("&screendivcd=","|"),
69 + };
70 + }else{
71 + flag++;
72 + }
73 + });
74 + await browser.close();
73 75
74 -// console.log(data); 76 + res.send(theaterData);
75 -// })(); 77 + //console.log(theaterData);
78 +}))
79 +
80 +let server = app.listen(80);
......