Merge branch 'feature/LotteCinema_Crawling' into 'feature/LotteCinema_Crawling'
Feature/lotte cinema crawling Add getting playing movie information See merge request !14
Showing
3 changed files
with
99 additions
and
21 deletions
| ... | @@ -8,9 +8,22 @@ const puppeteer = require('puppeteer'); | ... | @@ -8,9 +8,22 @@ const puppeteer = require('puppeteer'); |
| 8 | const cheerio = require('cheerio'); | 8 | const cheerio = require('cheerio'); |
| 9 | const { textContent } = require('domutils'); | 9 | const { textContent } = require('domutils'); |
| 10 | 10 | ||
| 11 | +//https://www.lottecinema.co.kr/NLCHS/Ticketing?movieCd=18632&movieName=범죄도시%202&screenCd=1|1|1009&screenName=김포공항&releaseDate=2022-05-18 | ||
| 12 | +let movieData = []; | ||
| 13 | +let theaterData = []; | ||
| 11 | 14 | ||
| 12 | -app.get('/LotteCinema', asyncHandler(async (req, res, next) => { | 15 | +function getToday(){ |
| 13 | - | 16 | + var date = new Date(); |
| 17 | + var year = date.getFullYear(); | ||
| 18 | + var month = ("0" + (1 + date.getMonth())).slice(-2); | ||
| 19 | + var day = ("0" + date.getDate()).slice(-2); | ||
| 20 | + | ||
| 21 | + return year + "-" + month + "-" + day; | ||
| 22 | +} | ||
| 23 | +//console.log(getToday()); | ||
| 24 | + | ||
| 25 | + | ||
| 26 | +(async () => { | ||
| 14 | const browser = await puppeteer.launch(); | 27 | const browser = await puppeteer.launch(); |
| 15 | 28 | ||
| 16 | const page = await browser.newPage(); | 29 | const page = await browser.newPage(); |
| ... | @@ -21,13 +34,13 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { | ... | @@ -21,13 +34,13 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { |
| 21 | 34 | ||
| 22 | let content = await page.content(); | 35 | let content = await page.content(); |
| 23 | let $ = cheerio.load(content, {decodeEntities: true}); | 36 | let $ = cheerio.load(content, {decodeEntities: true}); |
| 24 | - let data = []; | 37 | + |
| 25 | const $bodyList = $("#contents > div > ul.movie_list.type2").children("li"); | 38 | const $bodyList = $("#contents > div > ul.movie_list.type2").children("li"); |
| 26 | 39 | ||
| 27 | let i =0; | 40 | let i =0; |
| 28 | - $bodyList.each(function(elem){ | 41 | + $bodyList.each(function(temp_Body){ |
| 29 | if($(this).find('a > em').text() != "AD"){ | 42 | if($(this).find('a > em').text() != "AD"){ |
| 30 | - data[i++]={ | 43 | + movieData[i++]={ |
| 31 | rank : i, | 44 | rank : i, |
| 32 | url: $(this).find('div.top_info > div > div > a').attr('href'), | 45 | url: $(this).find('div.top_info > div > div > a').attr('href'), |
| 33 | title : $(this).find(' div.btm_info > strong').text(), | 46 | title : $(this).find(' div.btm_info > strong').text(), |
| ... | @@ -36,26 +49,14 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { | ... | @@ -36,26 +49,14 @@ app.get('/LotteCinema', asyncHandler(async (req, res, next) => { |
| 36 | }; | 49 | }; |
| 37 | } | 50 | } |
| 38 | }); | 51 | }); |
| 39 | - await browser.close(); | ||
| 40 | - | ||
| 41 | - res.send(data); | ||
| 42 | - //console.log(data); | ||
| 43 | -})) | ||
| 44 | 52 | ||
| 45 | -app.get('/LotteCinema/theater', asyncHandler(async (req, res, next) => { | ||
| 46 | - | ||
| 47 | - const browser = await puppeteer.launch(); | ||
| 48 | - | ||
| 49 | - const page = await browser.newPage(); | ||
| 50 | - | ||
| 51 | - // 수집하고자 하는 URL을 입력 | ||
| 52 | await page.goto('https://www.lottecinema.co.kr/NLCHS/'); | 53 | await page.goto('https://www.lottecinema.co.kr/NLCHS/'); |
| 53 | 54 | ||
| 54 | - let content = await page.content(); | 55 | + content = await page.content(); |
| 55 | - let $ = cheerio.load(content, {decodeEntities: true}); | 56 | + $ = cheerio.load(content, {decodeEntities: true}); |
| 56 | - let theaterData = []; | 57 | + |
| 57 | const $TypeList = $("#nav > ul > li:nth-child(3) > div > ul").children("li").find("div > ul").children("li"); | 58 | const $TypeList = $("#nav > ul > li:nth-child(3) > div > ul").children("li").find("div > ul").children("li"); |
| 58 | - let i =0; | 59 | + i =0; |
| 59 | let flag = 0; | 60 | let flag = 0; |
| 60 | $TypeList.each(function(temp_Type){ | 61 | $TypeList.each(function(temp_Type){ |
| 61 | if(flag==1){ | 62 | if(flag==1){ |
| ... | @@ -71,10 +72,72 @@ app.get('/LotteCinema/theater', asyncHandler(async (req, res, next) => { | ... | @@ -71,10 +72,72 @@ app.get('/LotteCinema/theater', asyncHandler(async (req, res, next) => { |
| 71 | flag++; | 72 | flag++; |
| 72 | } | 73 | } |
| 73 | }); | 74 | }); |
| 75 | + | ||
| 74 | await browser.close(); | 76 | await browser.close(); |
| 77 | + console.log("Completed!"); | ||
| 78 | +})(); | ||
| 79 | + | ||
| 80 | + | ||
| 81 | +app.get('/LotteCinema', asyncHandler(async (req, res, next) => { | ||
| 82 | + | ||
| 83 | + res.send(movieData); | ||
| 84 | + //console.log(movieData); | ||
| 85 | +})) | ||
| 86 | + | ||
| 87 | +app.get('/LotteCinema/theater', asyncHandler(async (req, res, next) => { | ||
| 75 | 88 | ||
| 76 | res.send(theaterData); | 89 | res.send(theaterData); |
| 77 | //console.log(theaterData); | 90 | //console.log(theaterData); |
| 78 | })) | 91 | })) |
| 79 | 92 | ||
| 93 | +app.get('/LotteCinema/GetPlayingMovie', asyncHandler(async (req, res, next) => { | ||
| 94 | + | ||
| 95 | + const browser1 = await puppeteer.launch(); | ||
| 96 | + const page1 = await browser1.newPage(); | ||
| 97 | + | ||
| 98 | + testTheaterName = "판교"; | ||
| 99 | + testDate = "2022-05-30" | ||
| 100 | + let PlayingMovieURL; | ||
| 101 | + let playingMovieData = []; | ||
| 102 | + | ||
| 103 | + theaterData.forEach((val, index)=>{ | ||
| 104 | + // console.log(val); | ||
| 105 | + if(val.LocateName.includes(testTheaterName)){ | ||
| 106 | + PlayingMovieURL = movieData[0].url+ "&screenCd="+ val.LocateQuery + | ||
| 107 | + "&screenName=" + val.LocateName + | ||
| 108 | + "&releaseDate=" + testDate; | ||
| 109 | + } | ||
| 110 | + | ||
| 111 | + }) | ||
| 112 | + | ||
| 113 | + //console.log(!PlayingMovieURL); | ||
| 114 | + | ||
| 115 | + if(PlayingMovieURL){ | ||
| 116 | + await page1.goto(PlayingMovieURL); | ||
| 117 | + | ||
| 118 | + content = await page1.content(); | ||
| 119 | + $ = cheerio.load(content, {decodeEntities: true}); | ||
| 120 | + | ||
| 121 | + let i = 0; | ||
| 122 | + const $AbleList = $("#mCSB_9_container > ul").children("li"); | ||
| 123 | + | ||
| 124 | + $AbleList.each(function(temp_Able){ | ||
| 125 | + if($(this).attr("class") != "disabled"){ | ||
| 126 | + playingMovieData[i++]={ | ||
| 127 | + rank : i, | ||
| 128 | + title : $(this).find("a > div.group_infor > div > strong").text(), | ||
| 129 | + } | ||
| 130 | + } | ||
| 131 | + }); | ||
| 132 | + }else{ | ||
| 133 | + res.send("Please wait until get Movie and Theater information!"); | ||
| 134 | + console.log("Please wait until get Movie and Theater information!"); | ||
| 135 | + } | ||
| 136 | + | ||
| 137 | + await browser1.close(); | ||
| 138 | + | ||
| 139 | + res.send(playingMovieData); | ||
| 140 | + //console.log(theaterData); | ||
| 141 | +})) | ||
| 142 | + | ||
| 80 | let server = app.listen(80); | 143 | let server = app.listen(80); | ... | ... |
package-lock.json
0 → 100644
This diff could not be displayed because it is too large.
package.json
0 → 100644
| 1 | +{ | ||
| 2 | + "dependencies": { | ||
| 3 | + "axios": "^0.27.2", | ||
| 4 | + "body-parser": "^1.20.0", | ||
| 5 | + "cheerio": "^1.0.0-rc.10", | ||
| 6 | + "dom-parser": "^0.1.6", | ||
| 7 | + "ejs": "^3.1.7", | ||
| 8 | + "express": "^4.17.3", | ||
| 9 | + "express-async-handler": "^1.2.0", | ||
| 10 | + "express-session": "^1.17.2", | ||
| 11 | + "puppeteer": "^14.1.0", | ||
| 12 | + "request": "^2.88.2", | ||
| 13 | + "sanitize-html": "^2.7.0" | ||
| 14 | + } | ||
| 15 | +} |
-
Please register or login to post a comment