이혜인

crawlling all info including location, movie

1 const request = require('request'); 1 const request = require('request');
2 const cheerio = require('cheerio'); 2 const cheerio = require('cheerio');
3 const puppeteer = require('puppeteer'); 3 const puppeteer = require('puppeteer');
4 -const {Builder,Key,until} = require('selenium-webdriver'); //모듈 불러오기 4 +
5 +const {Builder,until} = require('selenium-webdriver'); //모듈 불러오기
5 var webdriver = require('selenium-webdriver'); 6 var webdriver = require('selenium-webdriver');
6 var By = webdriver.By; 7 var By = webdriver.By;
7 const chrome = require('selenium-webdriver/chrome');//크롬 사용시 8 const chrome = require('selenium-webdriver/chrome');//크롬 사용시
8 9
10 +const async = require('async')
9 11
10 let booking_url = "https://megabox.co.kr/booking"; 12 let booking_url = "https://megabox.co.kr/booking";
11 const rate_url = "https://www.megabox.co.kr/movie"; 13 const rate_url = "https://www.megabox.co.kr/movie";
12 -// var booking_options = { encoding: "utf-8", method: "GET", uri: booking_url};
13 14
14 let r =0; 15 let r =0;
15 let movie_data = []; 16 let movie_data = [];
17 +let location_data = [];
18 +let index = 0;
16 19
20 +async.waterfall([
21 + async () => {
17 22
18 -(async () => { 23 + const driver = new webdriver.Builder().forBrowser('chrome').build();//.setChromeOptions(new chrome.Options().headless())
19 - 24 + driver.get(booking_url);
20 - const driver = new webdriver.Builder().forBrowser('chrome').build(); 25 + driver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴
21 - driver.get(booking_url);
22 - driver.switchTo().frame(0)//frameBokdMBooking 프레임 가져옴
23 -
24 - let list = await driver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn')));
25 - r = 0;
26 - for (item of list) {
27 - //Using getAttribute to get the data
28 - movie_data[r++] = {
29 - 'rank' : r,
30 - 'title' : await item.getAttribute("movie-nm"),
31 - 'movie_num':await item.getAttribute("movie-no"),
32 - 'running':await item.getAttribute("form-at"),
33 - }
34 - }
35 - driver.close();
36 -
37 - r = 0;
38 - const browser = await puppeteer.launch({
39 - headless: true
40 - });
41 - const page = await browser.newPage();
42 - await page.goto(rate_url);
43 - const content = await page.content();
44 -
45 - const $ = cheerio.load(content);
46 - const $rate_lists = $("ol.list>li");
47 - $rate_lists.each((index, list) => {
48 - const name = $(list).find('div.tit-area > p.tit').attr('title');
49 - const rate = $(list).find('div.rate-date > span.rate').text();
50 26
51 - if(movie_data[r].title === name){ 27 + let seoul = await driver.wait(until.elementsLocated(By.css('#mCSB_4_container>ul>li>#btn')));
52 - movie_data[r]['rate'] = rate; 28 + let Gyeonggi = await driver.wait(until.elementsLocated(By.css('#mCSB_5_container>ul>li>#btn')));
53 - }else{ 29 + const Incheon = await driver.wait(until.elementsLocated(By.css('#mCSB_6_container>ul>li>#btn')));
54 - movie_data[r]['rate'] = '예매율 0.0%'; 30 + const DCS = await driver.wait(until.elementsLocated(By.css('#mCSB_7_container>ul>li>#btn')));//Daejeon Chungcheong Sejong
31 + const BDG = await driver.wait(until.elementsLocated(By.css('#mCSB_8_container>ul>li>#btn')));//Busan Daegu Gyeongsang
32 + const GJ= await driver.wait(until.elementsLocated(By.css('#mCSB_9_container>ul>li>#btn')));//gwangju_jeonla
33 + const Gangwon = await driver.wait(until.elementsLocated(By.css('#mCSB_10_container>ul>li>#btn')));
34 + const location_list = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon]//
35 + for(let i = 0; i < location_list.length; i++){
36 + for (item of location_list[i]) {
37 + let location_name = await item.getAttribute("brch-nm");
38 + let location_num = await item.getAttribute("brch-no");
39 + let obj = {};
40 + obj[location_name]= location_num
41 + console.log(obj)
42 + location_data[index++] = obj;
43 +
44 + }
55 } 45 }
56 - });
57 - r = 0;
58 - for(i of movie_data){
59 - console.log(i);
60 - }
61 -
62 - browser.close();
63 -})();
64 -
65 -// (async () => {
66 -
67 -
68 -// })();
69 -
70 -
71 46
47 + let movie_list = await driver.wait(until.elementsLocated(By.css('#mCSB_1_container>ul>li>.btn')));
48 + r = 0;
49 + for (item of movie_list) {
50 + //Using getAttribute to get the data
51 + movie_data[r++] = {
52 + 'rank' : r,
53 + 'title' : await item.getAttribute("movie-nm"),
54 + 'movie_num':await item.getAttribute("movie-no"),
55 + 'running':await item.getAttribute("form-at"),
56 + }
57 + }
58 +
59 + driver.close();
60 +
61 + },
72 62
63 + async () => {
73 64
65 + r = 0;
66 + const browser = await puppeteer.launch({
67 + headless: true
68 + });
69 + const page = await browser.newPage();
70 + await page.goto(rate_url);
71 + const content = await page.content();
72 +
73 + const $ = cheerio.load(content);
74 + const $rate_lists = $("ol.list>li");
75 + $rate_lists.each((index, list) => {
76 + const name = $(list).find('div.tit-area > p.tit').attr('title');
77 + const rate = $(list).find('div.rate-date > span.rate').text();
78 +
79 + if(movie_data[r].title === name){
80 + movie_data[r++]['rate'] = rate;
81 + }
82 + });
83 + for(i of movie_data){
84 + if(Object.keys(i).length==4){
85 + movie_data[r++]['rate'] = '예매율 0%';
86 + }
87 + }
88 + for(i of location_data){
89 + console.log(i['동탄'])
90 + }
91 + browser.close();
92 + }
93 +])
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
10 "author": "", 10 "author": "",
11 "license": "ISC", 11 "license": "ISC",
12 "dependencies": { 12 "dependencies": {
13 + "async": "^3.2.3",
13 "body-parser": "^1.20.0", 14 "body-parser": "^1.20.0",
14 "cheerio": "^1.0.0-rc.11", 15 "cheerio": "^1.0.0-rc.11",
15 "express": "^4.18.1", 16 "express": "^4.18.1",
......