Merge branch 'feature/Megabox_Crawling' into 'master'
Feature/megabox crawling First Merge From feature/Megabox_Crawling into master See merge request !18
Showing
2 changed files
with
165 additions
and
0 deletions
Megabox_crawling/app.js
0 → 100644
1 | +const request = require('request'); | ||
2 | +const cheerio = require('cheerio'); | ||
3 | +const puppeteer = require('puppeteer'); | ||
4 | + | ||
5 | +const {Builder,until} = require('selenium-webdriver'); //모듈 불러오기 | ||
6 | +var webdriver = require('selenium-webdriver'); | ||
7 | +var By = webdriver.By; | ||
8 | +const chrome = require('selenium-webdriver/chrome');//크롬 사용시 | ||
9 | + | ||
10 | +const async = require('async') | ||
11 | +let express = require('express'); | ||
12 | +let app = express(); | ||
13 | +let bodyParser = require('body-parser'); | ||
14 | +const { timeout } = require('async'); | ||
15 | +app.use(bodyParser.urlencoded({ extended: false })); | ||
16 | +app.use(bodyParser.json()); | ||
17 | + | ||
18 | + | ||
19 | +const booking_url = "https://megabox.co.kr/booking"; | ||
20 | +const rate_url = "https://www.megabox.co.kr/movie"; | ||
21 | + | ||
22 | +let r =0; | ||
23 | +let movie_data = []; | ||
24 | +let location_data = []; | ||
25 | +let index = 0; | ||
26 | + | ||
27 | + | ||
28 | +async.waterfall([//for 동기적 처리 | ||
29 | + async () => { | ||
30 | + const driver = new webdriver.Builder().forBrowser('chrome').setChromeOptions(new chrome.Options().headless()).build();// | ||
31 | + driver.get(booking_url); | ||
32 | + driver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴 | ||
33 | + | ||
34 | + let seoul = await driver.wait(until.elementsLocated(By.css('#mCSB_4_container>ul>li>#btn'))); | ||
35 | + let Gyeonggi = await driver.wait(until.elementsLocated(By.css('#mCSB_5_container>ul>li>#btn'))); | ||
36 | + const Incheon = await driver.wait(until.elementsLocated(By.css('#mCSB_6_container>ul>li>#btn'))); | ||
37 | + const DCS = await driver.wait(until.elementsLocated(By.css('#mCSB_7_container>ul>li>#btn')));//Daejeon Chungcheong Sejong | ||
38 | + const BDG = await driver.wait(until.elementsLocated(By.css('#mCSB_8_container>ul>li>#btn')));//Busan Daegu Gyeongsang | ||
39 | + const GJ= await driver.wait(until.elementsLocated(By.css('#mCSB_9_container>ul>li>#btn')));//gwangju_jeonla | ||
40 | + const Gangwon = await driver.wait(until.elementsLocated(By.css('#mCSB_10_container>ul>li>#btn'))); | ||
41 | + const location_list = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon]// | ||
42 | + for(let i = 0; i < location_list.length; i++){ | ||
43 | + for (item of location_list[i]) { | ||
44 | + location_data[index++] = { | ||
45 | + 'LocationName':await item.getAttribute("brch-nm"), | ||
46 | + 'LocationNUm' : await item.getAttribute("brch-no") | ||
47 | + } | ||
48 | + // let location_name = await item.getAttribute("brch-nm"); | ||
49 | + // let location_num = await item.getAttribute("brch-no"); | ||
50 | + // let obj = {}; | ||
51 | + // obj[location_name]= location_num | ||
52 | + // location_data[index++] = obj; | ||
53 | + | ||
54 | + } | ||
55 | + } | ||
56 | + | ||
57 | + let movie_list = await driver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn'))); | ||
58 | + r = 0; | ||
59 | + for (item of movie_list) { | ||
60 | + //Using getAttribute to get the data | ||
61 | + movie_data[r++] = { | ||
62 | + 'rank' : r, | ||
63 | + 'title' : await item.getAttribute("movie-nm"), | ||
64 | + 'movie_num':await item.getAttribute("movie-no"), | ||
65 | + } | ||
66 | + } | ||
67 | + | ||
68 | + driver.close(); | ||
69 | + | ||
70 | + }, | ||
71 | + | ||
72 | + async () => { | ||
73 | + | ||
74 | + r = 0; | ||
75 | + const browser = await puppeteer.launch({ | ||
76 | + headless: true | ||
77 | + }); | ||
78 | + const page = await browser.newPage(); | ||
79 | + await page.goto(rate_url); | ||
80 | + const content = await page.content(); | ||
81 | + | ||
82 | + const $ = cheerio.load(content); | ||
83 | + const $rate_lists = $("ol.list>li"); | ||
84 | + $rate_lists.each((index, list) => { | ||
85 | + const name = $(list).find('div.tit-area > p.tit').attr('title'); | ||
86 | + const rate = $(list).find('div.rate-date > span.rate').text(); | ||
87 | + | ||
88 | + if(movie_data[r].title === name){ | ||
89 | + movie_data[r++]['rate'] = rate; | ||
90 | + } | ||
91 | + }); | ||
92 | + for(i of movie_data){ | ||
93 | + if(Object.keys(i).length==3){ | ||
94 | + movie_data[r++]['rate'] = '예매율 0%'; | ||
95 | + } | ||
96 | + } | ||
97 | + | ||
98 | + browser.close(); | ||
99 | + }, | ||
100 | + | ||
101 | +]) | ||
102 | + | ||
103 | + | ||
104 | +let userData = { | ||
105 | + 'Date': '', | ||
106 | + 'location':'' | ||
107 | +}; | ||
108 | +// const _sleep = (delay) => new Promise((resolve) => setTimeout(resolve, delay)); | ||
109 | +app.get('/Megabox', (req, res) => { | ||
110 | + | ||
111 | + res.send(movie_data); | ||
112 | + | ||
113 | +}) | ||
114 | + | ||
115 | +const appdriver = new webdriver.Builder().forBrowser('chrome').setChromeOptions(new chrome.Options().headless()).build();//.setChromeOptions(new chrome.Options().headless()) | ||
116 | + | ||
117 | +app.post('/Megabox', (req, res) => {//사용자에게 Date와 location(영화관 장소) 받아옴 | ||
118 | + userData['Date'] = req.body.Date; | ||
119 | + for(i of location_data){ | ||
120 | + if(i['LocationName'] == req.body.location){ | ||
121 | + userData['location']=i['LocationNUm']; | ||
122 | + break; | ||
123 | + } | ||
124 | + } | ||
125 | + | ||
126 | + let PlayingMovieURL = booking_url + '?brchNo1='+userData['location']+'&playDe='+userData['Date'];//사용자 정보 바탕으로 해당 일자 영화관 영화 상영 여부 확인 | ||
127 | + | ||
128 | + appdriver.get(PlayingMovieURL); | ||
129 | + appdriver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴 | ||
130 | + res.send(movie_data); | ||
131 | + | ||
132 | +}) | ||
133 | + | ||
134 | +app.get('/Megabox/GetPlayingMovie', async(req, res, next) => {//영화 상영 여부 객체에 넣음 | ||
135 | + | ||
136 | + let movie_list = await appdriver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn'))); | ||
137 | + let n = 0; | ||
138 | + for (item of movie_list) { | ||
139 | + movie_data[n++]['running'] = await item.getAttribute('form-at') | ||
140 | + } | ||
141 | + | ||
142 | + res.send(movie_data); | ||
143 | +}) | ||
144 | + | ||
145 | +app.listen(23023); | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
Megabox_crawling/package.json
0 → 100644
1 | +{ | ||
2 | + "name": "me", | ||
3 | + "version": "1.0.0", | ||
4 | + "description": "", | ||
5 | + "main": "app.js", | ||
6 | + "scripts": { | ||
7 | + "test": "echo \"Error: no test specified\" && exit 1" | ||
8 | + }, | ||
9 | + "keywords": [], | ||
10 | + "author": "", | ||
11 | + "license": "ISC", | ||
12 | + "dependencies": { | ||
13 | + "async": "^3.2.3", | ||
14 | + "body-parser": "^1.20.0", | ||
15 | + "cheerio": "^1.0.0-rc.11", | ||
16 | + "express": "^4.18.1", | ||
17 | + "puppeteer": "^14.1.1", | ||
18 | + "selenium-webdriver": "^4.1.2" | ||
19 | + } | ||
20 | +} |
-
Please register or login to post a comment