Showing
12 changed files
with
0 additions
and
234 deletions
WebCrawling/.idea/.gitignore
deleted
100644 → 0
WebCrawling/.idea/misc.xml
deleted
100644 → 0
| 1 | -<?xml version="1.0" encoding="UTF-8"?> | ||
| 2 | -<project version="4"> | ||
| 3 | - <component name="ProjectRootManager" version="2" languageLevel="JDK_15" default="true" project-jdk-name="15" project-jdk-type="JavaSDK"> | ||
| 4 | - <output url="file://$PROJECT_DIR$/out" /> | ||
| 5 | - </component> | ||
| 6 | -</project> | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
WebCrawling/.idea/modules.xml
deleted
100644 → 0
| 1 | -<?xml version="1.0" encoding="UTF-8"?> | ||
| 2 | -<project version="4"> | ||
| 3 | - <component name="ProjectModuleManager"> | ||
| 4 | - <modules> | ||
| 5 | - <module fileurl="file://$PROJECT_DIR$/WebCrawling.iml" filepath="$PROJECT_DIR$/WebCrawling.iml" /> | ||
| 6 | - </modules> | ||
| 7 | - </component> | ||
| 8 | -</project> | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
WebCrawling/.idea/vcs.xml
deleted
100644 → 0
WebCrawling/WebCrawling.iml
deleted
100644 → 0
| 1 | -<?xml version="1.0" encoding="UTF-8"?> | ||
| 2 | -<module type="JAVA_MODULE" version="4"> | ||
| 3 | - <component name="NewModuleRootManager" inherit-compiler-output="true"> | ||
| 4 | - <exclude-output /> | ||
| 5 | - <content url="file://$MODULE_DIR$"> | ||
| 6 | - <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" /> | ||
| 7 | - </content> | ||
| 8 | - <orderEntry type="inheritedJdk" /> | ||
| 9 | - <orderEntry type="sourceFolder" forTests="false" /> | ||
| 10 | - <orderEntry type="library" name="jsoup-1.15.1" level="project" /> | ||
| 11 | - </component> | ||
| 12 | -</module> | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
WebCrawling/chromedriver
deleted
100755 → 0
This file is too large to display.
WebCrawling/jsoup-1.15.1.jar
deleted
100644 → 0
No preview for this file type
No preview for this file type
No preview for this file type
WebCrawling/src/CGVExample.java
deleted
100644 → 0
This diff is collapsed. Click to expand it.
WebCrawling/src/CGVTicketing.js
deleted
100644 → 0
| 1 | -require('chromedriver'); | ||
| 2 | -const request = require('request'); | ||
| 3 | -const cheerio = require('cheerio'); | ||
| 4 | -const puppeteer = require('puppeteer'); | ||
| 5 | - | ||
| 6 | -const async = require('async'); | ||
| 7 | -let express = require('express'); | ||
| 8 | -let app = express(); | ||
| 9 | -let bodyParser = require('body-parser'); | ||
| 10 | -const { timeout } = require('async'); | ||
| 11 | - | ||
| 12 | -const {Builder,until} = require('selenium-webdriver'); //모듈 불러오기 | ||
| 13 | -const webdriver = require('selenium-webdriver'); | ||
| 14 | -const chrome = require('selenium-webdriver/chrome'); | ||
| 15 | -const { delayed } = require('selenium-webdriver/lib/promise'); | ||
| 16 | -const By = webdriver.By; | ||
| 17 | - | ||
| 18 | -app.use(bodyParser.urlencoded({ extended: false })); | ||
| 19 | -app.use(bodyParser.json()); | ||
| 20 | - | ||
| 21 | -const url_movies = "https://www.cgv.co.kr/movies/?lt=1&ft=0"; //끝의 쿼리 0은 개봉 전 영화도 포함하는 것. 예매율 순위 가져오기 | ||
| 22 | -const url_theaters = "https://www.cgv.co.kr/theaters"; //영화관 정보 가져오는 링크. | ||
| 23 | -const url_ticketing = "https://www.cgv.co.kr/ticket/"; //상영중인 영화 정보 가져오는 링크. | ||
| 24 | - | ||
| 25 | -let cgv_theaters = []; //영화관과 영화관 고유 코드를 담는 배열 | ||
| 26 | -let cgv_movies = []; //예매율 상위 19위까지의 영화 정보(CGVMovieInfo Class의 인스턴스)들을 담는 배열. | ||
| 27 | -let cgv_accessible_movies = []; //선택한 일자, 영화관에서 예매할 수 있는 영화 이름과 영화 고유 코드를 담는 배열. | ||
| 28 | - | ||
| 29 | -class CGVMovieInfo { | ||
| 30 | - constructor(title, rank, score, GoldenEgg, movieCode){ | ||
| 31 | - this.title = title; | ||
| 32 | - this.rank = rank; | ||
| 33 | - this.score = score; | ||
| 34 | - this.GoldenEgg = GoldenEgg; | ||
| 35 | - this.movieCode = movieCode; | ||
| 36 | - } | ||
| 37 | - | ||
| 38 | - getTitle() { return this.title; } | ||
| 39 | - setTitle(title) { this.title = title; } | ||
| 40 | - getRank() { return this.rank; } | ||
| 41 | - setRank(rank) { this.rank = rank; } | ||
| 42 | - getScore() { return this.score; } | ||
| 43 | - setScore(score) { this.score = score; } | ||
| 44 | - getGoldenEgg() { return this.GoldenEgg; } | ||
| 45 | - setGoldenEgg(GoldenEgg) { this.GoldenEgg = GoldenEgg; } | ||
| 46 | - getMovieCode() { return this.movieCode; } | ||
| 47 | - setMovieCode(movieCode) { this.movieCode = movieCode; } | ||
| 48 | - | ||
| 49 | - printMovieInfo(){ | ||
| 50 | - return { | ||
| 51 | - 'rank': this.rank + " : " + this.title, | ||
| 52 | - 'score': "예매율 : " + this.score + "%", | ||
| 53 | - 'goldenEgg': "골든에그지수 : " + this.GoldenEgg, | ||
| 54 | - 'movieCode': "영화코드 : " + this.movieCode | ||
| 55 | - }; | ||
| 56 | - } | ||
| 57 | - | ||
| 58 | -} | ||
| 59 | - | ||
| 60 | -async.waterfall([ | ||
| 61 | - async () => { | ||
| 62 | - //크롬 설정을 담은 객체 생성 | ||
| 63 | - const driver_theaters = new webdriver.Builder().forBrowser('chrome').setChromeOptions(new chrome.Options().headless()).build(); | ||
| 64 | - driver_theaters.get(url_theaters); | ||
| 65 | - //영화관 및 영화관에 대응되는 영화관별 고유 코드 가져오기. | ||
| 66 | - let selector = '#contents > div.sect-common > div > div.sect-city > ul > li:nth-child({}) > div > ul > li > a'; | ||
| 67 | - let area = []; | ||
| 68 | - for(let i = 1; i <= 9; i++){ | ||
| 69 | - let region = await driver_theaters.wait(until.elementsLocated(By.css(selector.replace("{}", i)))); | ||
| 70 | - area.push(region); | ||
| 71 | - } | ||
| 72 | - for (const theaters_by_area of area) { | ||
| 73 | - let theaters_info_by_area = []; | ||
| 74 | - for (const theater of theaters_by_area){ | ||
| 75 | - let theater_info = { | ||
| 76 | - "theater_name" : await theater.getAttribute('title'), | ||
| 77 | - "theater_code" : await theater.getAttribute('href') | ||
| 78 | - }; | ||
| 79 | - theater_info.theater_name = theater_info.theater_name.replace("CGV", "") | ||
| 80 | - theater_info.theater_code = theater_info.theater_code.replace(/(.+(?<=theaterCode=))|(.+(?<=theatercode=))/, "").substring(0,4); | ||
| 81 | - theaters_info_by_area.push(theater_info); | ||
| 82 | - } | ||
| 83 | - cgv_theaters.push(theaters_info_by_area); | ||
| 84 | - } | ||
| 85 | - driver_theaters.close(); | ||
| 86 | - }, | ||
| 87 | - async () => { | ||
| 88 | - const driver_movies = new webdriver.Builder().forBrowser('chrome').setChromeOptions(new chrome.Options().headless()).build(); | ||
| 89 | - driver_movies.get(url_movies); | ||
| 90 | - //예매율 Top19까지의 영화의 정보를 가져옴. | ||
| 91 | - | ||
| 92 | - const rank = await driver_movies.wait(until.elementsLocated(By.css("strong.rank"))); | ||
| 93 | - const title = await driver_movies.wait(until.elementsLocated(By.css("strong.title"))); | ||
| 94 | - const score = await driver_movies.wait(until.elementsLocated(By.css("strong.percent"))); | ||
| 95 | - const GoldenEgg = await driver_movies.wait(until.elementsLocated(By.css("span.percent"))); | ||
| 96 | - const link = await driver_movies.wait(until.elementsLocated(By.css("a.link-reservation"))); | ||
| 97 | - | ||
| 98 | - //영화 제목, 순위, 예매율, 영화 코드, 골든에그 지수를 가져와 CGVMovieInfo 객체 생성자에 파라미터로 넘겨주고, 인스턴스를 받아옴. | ||
| 99 | - for (let i = 0; i < rank.length; i++) { | ||
| 100 | - const newTitle = await title[i].getText(); | ||
| 101 | - const newRank = await rank[i].getText(); | ||
| 102 | - const newScore = await score[i].getText(); | ||
| 103 | - const newCode = await link[i].getAttribute("href"); | ||
| 104 | - const newMovie = new CGVMovieInfo(newTitle, parseInt(newRank.replace("No.", "")), newScore.replace("예매율", "").replace("%", ""), await GoldenEgg[i].getText(), newCode.replace(/[^0-9]/g, "").substring(0,8)); | ||
| 105 | - cgv_movies.push(newMovie); | ||
| 106 | - } | ||
| 107 | - driver_movies.close(); | ||
| 108 | - } | ||
| 109 | -]) | ||
| 110 | - | ||
| 111 | -app.get('/cgv_theaters', (req, res) => { | ||
| 112 | - res.send(cgv_theaters[0]); | ||
| 113 | -}); | ||
| 114 | - | ||
| 115 | -app.post('/ticketing', async (req, res, next) => { | ||
| 116 | - //영화관 이름과 날짜를 가져옴. | ||
| 117 | - const theaterName = req.body.theaterName; | ||
| 118 | - const date = req.body.date; | ||
| 119 | - const LocateQuery = "?PLAY_YMD={}".replace("{}", date); | ||
| 120 | - | ||
| 121 | - //입력된 영화관에 맞는 지역 코드와 영화관 고유코드 찾기 | ||
| 122 | - let regionCode = 0, theaterCode = ""; | ||
| 123 | - for(let i = 0; i < 9; i++){ | ||
| 124 | - for(const elem of cgv_theaters[i]){ | ||
| 125 | - if(elem.theater_name == theaterName){ | ||
| 126 | - regionCode = i; | ||
| 127 | - theaterCode = elem.theater_code; | ||
| 128 | - break; | ||
| 129 | - } | ||
| 130 | - } | ||
| 131 | - } | ||
| 132 | - | ||
| 133 | - //예매 가능한 영화 리스트를 얻기 위해 빠른 예매 사이트로 이동. | ||
| 134 | - const driver_ticketing = new webdriver.Builder().forBrowser('chrome').setChromeOptions(new chrome.Options()).build(); | ||
| 135 | - driver_ticketing.get(url_ticketing + LocateQuery); | ||
| 136 | - driver_ticketing.switchTo().frame("ticket_iframe"); //Frame 전환 | ||
| 137 | - | ||
| 138 | - //지역 코드에 맞게 list element click | ||
| 139 | - const selected_areas_list = await driver_ticketing.wait(until.elementsLocated(By.css("#theater_area_list > ul > li > a > span.name"))); | ||
| 140 | - await selected_areas_list[regionCode].click(); | ||
| 141 | - driver_ticketing.sleep(1000); | ||
| 142 | - | ||
| 143 | - //선택한 지역에 대응되는 영화관 정보 가져오기 | ||
| 144 | - const selected_theaters_list = await driver_ticketing.wait(until.elementsLocated(By.css("#theater_area_list > ul > li.selected > div > ul > li"))); | ||
| 145 | - | ||
| 146 | - //프로그램 내부에서 가지고 있는 영화관코드와 웹에서 받아온 영화관코드가 일치하는 경우, selected_theaters_list element 클릭 | ||
| 147 | - for (const theater_element of selected_theaters_list){ | ||
| 148 | - if(await theater_element.getAttribute("theater_cd") == theaterCode){ | ||
| 149 | - await theater_element.click(); | ||
| 150 | - driver_ticketing.sleep(1000); | ||
| 151 | - break; | ||
| 152 | - } | ||
| 153 | - } | ||
| 154 | - | ||
| 155 | - //선택한 영화관에서, 선택한 일자에 상영하는 영화 목록 들고오기 | ||
| 156 | - const selected_movies_list = await driver_ticketing.wait(until.elementsLocated(By.css("#movie_list > ul > li > a > span.text"))); | ||
| 157 | - const codes_of_selected_movies = await driver_ticketing.wait(until.elementsLocated(By.css("#movie_list > ul > li"))); | ||
| 158 | - | ||
| 159 | - //선택불가를 제외한 영화 제목 및 영화 코드 가져오기. | ||
| 160 | - for(let i = 0; i < selected_movies_list.length; i++){ | ||
| 161 | - const movie_enabled = await codes_of_selected_movies[i].getAttribute("class") | ||
| 162 | - if(movie_enabled.endsWith("dimmed")) | ||
| 163 | - break; | ||
| 164 | - const accessible_movie = { | ||
| 165 | - "movie_title": await selected_movies_list[i].getText(), | ||
| 166 | - "movie_code" : await codes_of_selected_movies[i].getAttribute("movie_cd_group") | ||
| 167 | - } | ||
| 168 | - cgv_accessible_movies.push(accessible_movie); | ||
| 169 | - } | ||
| 170 | - driver_ticketing.close(); | ||
| 171 | - | ||
| 172 | - res.send(cgv_accessible_movies); | ||
| 173 | -}); | ||
| 174 | - | ||
| 175 | -app.listen(23023); | ||
| ... | \ No newline at end of file | ... | \ No newline at end of file |
WebCrawling/src/package.json
deleted
100644 → 0
| 1 | -{ | ||
| 2 | - "name": "test01", | ||
| 3 | - "version": "1.0.0", | ||
| 4 | - "description": "", | ||
| 5 | - "main": "index.js", | ||
| 6 | - "scripts": { | ||
| 7 | - "test": "echo \"Error: no test specified\" && exit 1" | ||
| 8 | - }, | ||
| 9 | - "author": "", | ||
| 10 | - "license": "ISC", | ||
| 11 | - "dependencies": { | ||
| 12 | - "async": "^3.2.3", | ||
| 13 | - "body-parser": "^1.20.0", | ||
| 14 | - "cheerio": "^1.0.0-rc.11", | ||
| 15 | - "express": "^4.18.1", | ||
| 16 | - "puppeteer": "^14.1.1", | ||
| 17 | - "selenium-webdriver": "^4.1.2" | ||
| 18 | - } | ||
| 19 | -} |
-
Please register or login to post a comment