Merge branch 'feature/Megabox_Crawling' into 'feature/Megabox_Crawling'
Feature/megabox crawling megabox crawling first merge See merge request !11
Showing
1 changed file
with
78 additions
and
0 deletions
Megabox_crawling/megaboxCrawling.py
0 → 100644
| 1 | +from urllib import response | ||
| 2 | +import requests | ||
| 3 | +from bs4 import BeautifulSoup | ||
| 4 | +from selenium import webdriver | ||
| 5 | +from webdriver_manager.chrome import ChromeDriverManager | ||
| 6 | +from selenium.webdriver.common.keys import Keys | ||
| 7 | + | ||
| 8 | +options = webdriver.ChromeOptions() | ||
| 9 | +options.add_argument("headless") #창 안 띄움 | ||
| 10 | +options.add_experimental_option("excludeSwitches", ["enable-logging"]) | ||
| 11 | +driver = webdriver.Chrome(options = options) | ||
| 12 | +driver2=webdriver.Chrome(options = options) | ||
| 13 | + | ||
| 14 | +url = "https://megabox.co.kr/booking" | ||
| 15 | +rate_url = "https://www.megabox.co.kr/movie" | ||
| 16 | +driver.maximize_window() | ||
| 17 | +driver2.maximize_window() | ||
| 18 | + | ||
| 19 | +driver.implicitly_wait(2) | ||
| 20 | +driver.get(url) | ||
| 21 | + | ||
| 22 | +driver2.implicitly_wait(2) | ||
| 23 | +driver2.get(rate_url) | ||
| 24 | + | ||
| 25 | +r2 = driver2.page_source | ||
| 26 | +soup = BeautifulSoup(r2, "html.parser") | ||
| 27 | +ticketing_rate = soup.select('.rate') | ||
| 28 | +movie_name = soup.select('.tit-area > p.tit') | ||
| 29 | + | ||
| 30 | +iframes = driver.find_elements_by_css_selector('iframe') | ||
| 31 | + | ||
| 32 | +driver.switch_to.frame('frameBokdMBooking') | ||
| 33 | +r = driver.page_source | ||
| 34 | +soup = BeautifulSoup(r, "html.parser") | ||
| 35 | + | ||
| 36 | +seoul = soup.select("#mCSB_4_container>ul>li>button") | ||
| 37 | +Gyeonggi = soup.select("#mCSB_5_container>ul>li>button") | ||
| 38 | +Incheon = soup.select("#mCSB_6_container>ul>li>button") | ||
| 39 | +DCS = soup.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong | ||
| 40 | +BDG = soup.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang | ||
| 41 | +GJ= soup.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla | ||
| 42 | +Gangwon = soup.select("#mCSB_10_container>ul>li>button") | ||
| 43 | +dict1 = dict() | ||
| 44 | +for brch in seoul: | ||
| 45 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
| 46 | +for brch in Gyeonggi: | ||
| 47 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
| 48 | +for brch in Incheon: | ||
| 49 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
| 50 | +for brch in DCS: | ||
| 51 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
| 52 | +for brch in BDG: | ||
| 53 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
| 54 | +for brch in GJ: | ||
| 55 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
| 56 | +for brch in Gangwon: | ||
| 57 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
| 58 | + | ||
| 59 | +attr1 = soup.select("#mCSB_1_container>ul>li>button") | ||
| 60 | + | ||
| 61 | +movie_dict = dict() | ||
| 62 | +for movie in attr1: | ||
| 63 | + movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']] | ||
| 64 | + | ||
| 65 | +for r, m in zip(ticketing_rate, movie_name): | ||
| 66 | + movie_dict[m['title']].append(r.string) | ||
| 67 | + | ||
| 68 | +rank = 1 | ||
| 69 | +for value in movie_dict.values(): | ||
| 70 | + if(len(value) == 2): | ||
| 71 | + value.append("예메율 0.0%") | ||
| 72 | + if(rank<=10): | ||
| 73 | + value.append({'rank' : rank}) | ||
| 74 | + rank += 1 | ||
| 75 | +print(movie_dict) | ||
| 76 | + | ||
| 77 | +#form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이) | ||
| 78 | +#brch-no로 쿼리 주고 나서 form-at확인 필요 |
-
Please register or login to post a comment