Showing
3 changed files
with
65 additions
and
75 deletions
Megabox_crawling/app.js
0 → 100644
| 1 | +const request = require('request'); | ||
| 2 | +const cheerio = require('cheerio'); | ||
| 3 | +const puppeteer = require('puppeteer'); | ||
| 4 | +const {Builder,Key,until} = require('selenium-webdriver'); //모듈 불러오기 | ||
| 5 | +var webdriver = require('selenium-webdriver'); | ||
| 6 | +var By = webdriver.By; | ||
| 7 | +const chrome = require('selenium-webdriver/chrome');//크롬 사용시 | ||
| 8 | + | ||
| 9 | + | ||
| 10 | +let booking_url = "https://megabox.co.kr/booking"; | ||
| 11 | +const rate_url = "https://www.megabox.co.kr/movie"; | ||
| 12 | +// var booking_options = { encoding: "utf-8", method: "GET", uri: booking_url}; | ||
| 13 | + | ||
| 14 | +let r =0; | ||
| 15 | +let movie_data = []; | ||
| 16 | + | ||
| 17 | + | ||
| 18 | +(async () => { | ||
| 19 | + | ||
| 20 | + r = 0; | ||
| 21 | + const browser = await puppeteer.launch({ | ||
| 22 | + headless: true | ||
| 23 | + }); | ||
| 24 | + const page = await browser.newPage(); | ||
| 25 | + await page.goto(rate_url); | ||
| 26 | + const content = await page.content(); | ||
| 27 | + | ||
| 28 | + const $ = cheerio.load(content); | ||
| 29 | + const $rate_lists = $("ol.list>li"); | ||
| 30 | + $rate_lists.each((index, list) => { | ||
| 31 | + const name = $(list).find('div.tit-area > p.tit').attr('title'); | ||
| 32 | + const rate = $(list).find('div.rate-date > span.rate').text(); | ||
| 33 | + | ||
| 34 | + }); | ||
| 35 | + r = 0; | ||
| 36 | + for(i of movie_data){ | ||
| 37 | + console.log(i); | ||
| 38 | + } | ||
| 39 | + | ||
| 40 | + browser.close(); | ||
| 41 | +})(); | ||
| 42 | + | ||
| 43 | + | ||
| 44 | + | ||
| 45 | + | ||
| 46 | + |
Megabox_crawling/megaboxCrawling.py
deleted
100644 → 0
| 1 | -from bs4 import BeautifulSoup | ||
| 2 | -from selenium import webdriver | ||
| 3 | -import chromedriver_autoinstaller | ||
| 4 | - | ||
| 5 | -chromedriver_autoinstaller.install() | ||
| 6 | - | ||
| 7 | -booking_url = "https://megabox.co.kr/booking" | ||
| 8 | -rate_url = "https://www.megabox.co.kr/movie" | ||
| 9 | - | ||
| 10 | -options = webdriver.ChromeOptions() | ||
| 11 | -options.add_argument("headless") #창 안 띄움 | ||
| 12 | -options.add_experimental_option("excludeSwitches", ["enable-logging"]) | ||
| 13 | - | ||
| 14 | -driver = webdriver.Chrome(options = options) | ||
| 15 | -driver.maximize_window() | ||
| 16 | -# driver.implicitly_wait(2) | ||
| 17 | -driver.get(booking_url) | ||
| 18 | - | ||
| 19 | -driver2=webdriver.Chrome(options = options) | ||
| 20 | -driver2.maximize_window() | ||
| 21 | -# driver2.implicitly_wait(2) | ||
| 22 | -driver2.get(rate_url) | ||
| 23 | - | ||
| 24 | - | ||
| 25 | -theater_location = dict() | ||
| 26 | - | ||
| 27 | -# iframes = driver.find_elements_by_css_selector('iframe') | ||
| 28 | -driver.switch_to.frame('frameBokdMBooking') | ||
| 29 | -page1 = driver.page_source | ||
| 30 | -soup1 = BeautifulSoup(page1, "html.parser") | ||
| 31 | - | ||
| 32 | -seoul = soup1.select("#mCSB_4_container>ul>li>button") | ||
| 33 | -Gyeonggi = soup1.select("#mCSB_5_container>ul>li>button") | ||
| 34 | -Incheon = soup1.select("#mCSB_6_container>ul>li>button") | ||
| 35 | -DCS = soup1.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong | ||
| 36 | -BDG = soup1.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang | ||
| 37 | -GJ= soup1.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla | ||
| 38 | -Gangwon = soup1.select("#mCSB_10_container>ul>li>button") | ||
| 39 | - | ||
| 40 | -loc = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon] | ||
| 41 | - | ||
| 42 | -def get_location_code(location): | ||
| 43 | - for brch in location: | ||
| 44 | - theater_location[brch['brch-nm']] = brch['brch-no'] | ||
| 45 | - | ||
| 46 | - | ||
| 47 | -for parameter in loc: | ||
| 48 | - get_location_code(parameter) | ||
| 49 | - | ||
| 50 | - | ||
| 51 | -page2 = driver2.page_source | ||
| 52 | -soup2 = BeautifulSoup(page2, "html.parser") | ||
| 53 | -ticketing_rate = soup2.select('.rate') | ||
| 54 | -movie_name = soup2.select('.tit-area > p.tit') | ||
| 55 | -get_movie_info = soup1.select("#mCSB_1_container>ul>li>button") | ||
| 56 | - | ||
| 57 | -movie_dict = dict() | ||
| 58 | -rank = 1 | ||
| 59 | - | ||
| 60 | -for movie in get_movie_info: | ||
| 61 | - movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']] | ||
| 62 | - | ||
| 63 | -for r, m in zip(ticketing_rate, movie_name): | ||
| 64 | - movie_dict[m['title']].append(r.string) | ||
| 65 | - | ||
| 66 | - | ||
| 67 | -for value in movie_dict.values(): | ||
| 68 | - if(len(value) == 2): | ||
| 69 | - value.append("예메율 0.0%") | ||
| 70 | - if(rank<=10): | ||
| 71 | - value.append({'rank' : rank}) | ||
| 72 | - rank += 1 | ||
| 73 | - | ||
| 74 | -#form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이) | ||
| 75 | -#brch-no로 쿼리 주고 나서 form-at확인 필요 | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
Megabox_crawling/package.json
0 → 100644
| 1 | +{ | ||
| 2 | + "name": "me", | ||
| 3 | + "version": "1.0.0", | ||
| 4 | + "description": "", | ||
| 5 | + "main": "app.js", | ||
| 6 | + "scripts": { | ||
| 7 | + "test": "echo \"Error: no test specified\" && exit 1" | ||
| 8 | + }, | ||
| 9 | + "keywords": [], | ||
| 10 | + "author": "", | ||
| 11 | + "license": "ISC", | ||
| 12 | + "dependencies": { | ||
| 13 | + "body-parser": "^1.20.0", | ||
| 14 | + "cheerio": "^1.0.0-rc.11", | ||
| 15 | + "express": "^4.18.1", | ||
| 16 | + "puppeteer": "^14.1.1", | ||
| 17 | + "selenium-webdriver": "^4.1.2" | ||
| 18 | + } | ||
| 19 | +} |
-
Please register or login to post a comment