Merge branch 'feature/LotteCinema_Crawling' into 'master'
Feature/lotte cinema crawling First Merge from feature/LotteCinema_Crawling into master See merge request !17
Showing
4 changed files
with
176 additions
and
0 deletions
LotteCinema/app.js
0 → 100644
1 | +let express = require('express'); | ||
2 | +let app = express(); | ||
3 | +let request = require('request'); | ||
4 | +const asyncHandler = require('express-async-handler') | ||
5 | +const { response } = require('express'); | ||
6 | + | ||
7 | +const puppeteer = require('puppeteer'); | ||
8 | +const cheerio = require('cheerio'); | ||
9 | +const { textContent } = require('domutils'); | ||
10 | + | ||
11 | +//https://www.lottecinema.co.kr/NLCHS/Ticketing?movieCd=18632&movieName=범죄도시%202&screenCd=1|1|1009&screenName=김포공항&releaseDate=2022-05-18 | ||
12 | +let movieData = []; | ||
13 | +let theaterData = []; | ||
14 | + | ||
15 | +function getToday(){ | ||
16 | + var date = new Date(); | ||
17 | + var year = date.getFullYear(); | ||
18 | + var month = ("0" + (1 + date.getMonth())).slice(-2); | ||
19 | + var day = ("0" + date.getDate()).slice(-2); | ||
20 | + | ||
21 | + return year + "-" + month + "-" + day; | ||
22 | +} | ||
23 | +//console.log(getToday()); | ||
24 | + | ||
25 | + | ||
26 | +(async () => { | ||
27 | + const browser = await puppeteer.launch(); | ||
28 | + | ||
29 | + const page = await browser.newPage(); | ||
30 | + | ||
31 | + // 수집하고자 하는 URL을 입력 | ||
32 | + await page.goto('https://www.lottecinema.co.kr/NLCHS/Movie/List?flag=1'); | ||
33 | + | ||
34 | + | ||
35 | + let content = await page.content(); | ||
36 | + let $ = cheerio.load(content, {decodeEntities: true}); | ||
37 | + | ||
38 | + const $bodyList = $("#contents > div > ul.movie_list.type2").children("li"); | ||
39 | + | ||
40 | + let i =0; | ||
41 | + $bodyList.each(function(temp_Body){ | ||
42 | + if($(this).find('a > em').text() != "AD"){ | ||
43 | + movieData[i++]={ | ||
44 | + rank : i, | ||
45 | + url: $(this).find('div.top_info > div > div > a').attr('href'), | ||
46 | + title : $(this).find(' div.btm_info > strong').text(), | ||
47 | + rate : $(this).find('div.btm_info > span > span.rate_info > em').text(), | ||
48 | + star : $(this).find('div.btm_info > span > span.star_info').text(), | ||
49 | + }; | ||
50 | + } | ||
51 | + }); | ||
52 | + | ||
53 | + await page.goto('https://www.lottecinema.co.kr/NLCHS/'); | ||
54 | + | ||
55 | + content = await page.content(); | ||
56 | + $ = cheerio.load(content, {decodeEntities: true}); | ||
57 | + | ||
58 | + const $TypeList = $("#nav > ul > li:nth-child(3) > div > ul").children("li").find("div > ul").children("li"); | ||
59 | + i =0; | ||
60 | + let flag = 0; | ||
61 | + $TypeList.each(function(temp_Type){ | ||
62 | + if(flag==1){ | ||
63 | + theaterData[i++]={ | ||
64 | + LocateUrl : $(this).find('a').attr('href'), | ||
65 | + LocateName : $(this).find('a').text(), | ||
66 | + LocateQuery : $(this).find('a').attr('href') | ||
67 | + .replace("https://www.lottecinema.co.kr/NLCHS/Cinema/Detail?divisionCode=","") | ||
68 | + .replace("https://www.lottecinema.co.kr/NLCHS/Cinema/SpecialCinema?divisionCode=","") | ||
69 | + .replace("&detailDivisionCode=","|").replace("&cinemaID=","|").replace("&screendivcd=","|"), | ||
70 | + }; | ||
71 | + }else{ | ||
72 | + flag++; | ||
73 | + } | ||
74 | + }); | ||
75 | + | ||
76 | + await browser.close(); | ||
77 | + console.log("Completed!"); | ||
78 | +})(); | ||
79 | + | ||
80 | + | ||
81 | +app.get('/LotteCinema', asyncHandler(async (req, res, next) => { | ||
82 | + | ||
83 | + res.send(movieData); | ||
84 | + //console.log(movieData); | ||
85 | +})) | ||
86 | + | ||
87 | +app.get('/LotteCinema/theater', asyncHandler(async (req, res, next) => { | ||
88 | + | ||
89 | + res.send(theaterData); | ||
90 | + //console.log(theaterData); | ||
91 | +})) | ||
92 | + | ||
93 | +app.get('/LotteCinema/GetPlayingMovie', asyncHandler(async (req, res, next) => { | ||
94 | + | ||
95 | + const browser1 = await puppeteer.launch(); | ||
96 | + const page1 = await browser1.newPage(); | ||
97 | + | ||
98 | + testTheaterName = "판교"; | ||
99 | + testDate = "2022-05-30" | ||
100 | + let PlayingMovieURL; | ||
101 | + let playingMovieData = []; | ||
102 | + | ||
103 | + theaterData.forEach((val, index)=>{ | ||
104 | + // console.log(val); | ||
105 | + if(val.LocateName.includes(testTheaterName)){ | ||
106 | + PlayingMovieURL = movieData[0].url+ "&screenCd="+ val.LocateQuery + | ||
107 | + "&screenName=" + val.LocateName + | ||
108 | + "&releaseDate=" + testDate; | ||
109 | + } | ||
110 | + | ||
111 | + }) | ||
112 | + | ||
113 | + //console.log(!PlayingMovieURL); | ||
114 | + | ||
115 | + if(PlayingMovieURL){ | ||
116 | + await page1.goto(PlayingMovieURL); | ||
117 | + | ||
118 | + content = await page1.content(); | ||
119 | + $ = cheerio.load(content, {decodeEntities: true}); | ||
120 | + | ||
121 | + let i = 0; | ||
122 | + const $AbleList = $("#mCSB_9_container > ul").children("li"); | ||
123 | + | ||
124 | + $AbleList.each(function(temp_Able){ | ||
125 | + if($(this).attr("class") != "disabled"){ | ||
126 | + playingMovieData[i++]={ | ||
127 | + rank : i, | ||
128 | + title : $(this).find("a > div.group_infor > div > strong").text(), | ||
129 | + } | ||
130 | + } | ||
131 | + }); | ||
132 | + }else{ | ||
133 | + res.send("Please wait until get Movie and Theater information!"); | ||
134 | + console.log("Please wait until get Movie and Theater information!"); | ||
135 | + } | ||
136 | + | ||
137 | + await browser1.close(); | ||
138 | + | ||
139 | + res.send(playingMovieData); | ||
140 | + //console.log(theaterData); | ||
141 | +})) | ||
142 | + | ||
143 | +let server = app.listen(80); |
LotteCinema/package.json
0 → 100644
1 | +{ | ||
2 | + "name": "Crawling LotteCinema Site", | ||
3 | + "version": "1.0.0", | ||
4 | + "description": "", | ||
5 | + "main": "index.js", | ||
6 | + "scripts": { | ||
7 | + "test": "echo \"Error: no test specified\" && exit 1" | ||
8 | + }, | ||
9 | + "author": "", | ||
10 | + "license": "ISC", | ||
11 | + "dependencies": { | ||
12 | + "body-parser": "^1.17.1", | ||
13 | + "express": "^4.15.2", | ||
14 | + "cheerio": "^0.22.0", | ||
15 | + "puppeteer": "^14.1.0", | ||
16 | + "express-async-handler": "^1.2.0" | ||
17 | + } | ||
18 | +} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
package-lock.json
0 → 100644
This diff could not be displayed because it is too large.
package.json
0 → 100644
1 | +{ | ||
2 | + "dependencies": { | ||
3 | + "axios": "^0.27.2", | ||
4 | + "body-parser": "^1.20.0", | ||
5 | + "cheerio": "^1.0.0-rc.10", | ||
6 | + "dom-parser": "^0.1.6", | ||
7 | + "ejs": "^3.1.7", | ||
8 | + "express": "^4.17.3", | ||
9 | + "express-async-handler": "^1.2.0", | ||
10 | + "express-session": "^1.17.2", | ||
11 | + "puppeteer": "^14.1.0", | ||
12 | + "request": "^2.88.2", | ||
13 | + "sanitize-html": "^2.7.0" | ||
14 | + } | ||
15 | +} |
-
Please register or login to post a comment