Merge branch 'feature/Megabox_Crawling' into 'feature/Megabox_Crawling'
Feature/megabox crawling megabox crawling first merge See merge request !11
Showing
1 changed file
with
78 additions
and
0 deletions
Megabox_crawling/megaboxCrawling.py
0 → 100644
1 | +from urllib import response | ||
2 | +import requests | ||
3 | +from bs4 import BeautifulSoup | ||
4 | +from selenium import webdriver | ||
5 | +from webdriver_manager.chrome import ChromeDriverManager | ||
6 | +from selenium.webdriver.common.keys import Keys | ||
7 | + | ||
8 | +options = webdriver.ChromeOptions() | ||
9 | +options.add_argument("headless") #창 안 띄움 | ||
10 | +options.add_experimental_option("excludeSwitches", ["enable-logging"]) | ||
11 | +driver = webdriver.Chrome(options = options) | ||
12 | +driver2=webdriver.Chrome(options = options) | ||
13 | + | ||
14 | +url = "https://megabox.co.kr/booking" | ||
15 | +rate_url = "https://www.megabox.co.kr/movie" | ||
16 | +driver.maximize_window() | ||
17 | +driver2.maximize_window() | ||
18 | + | ||
19 | +driver.implicitly_wait(2) | ||
20 | +driver.get(url) | ||
21 | + | ||
22 | +driver2.implicitly_wait(2) | ||
23 | +driver2.get(rate_url) | ||
24 | + | ||
25 | +r2 = driver2.page_source | ||
26 | +soup = BeautifulSoup(r2, "html.parser") | ||
27 | +ticketing_rate = soup.select('.rate') | ||
28 | +movie_name = soup.select('.tit-area > p.tit') | ||
29 | + | ||
30 | +iframes = driver.find_elements_by_css_selector('iframe') | ||
31 | + | ||
32 | +driver.switch_to.frame('frameBokdMBooking') | ||
33 | +r = driver.page_source | ||
34 | +soup = BeautifulSoup(r, "html.parser") | ||
35 | + | ||
36 | +seoul = soup.select("#mCSB_4_container>ul>li>button") | ||
37 | +Gyeonggi = soup.select("#mCSB_5_container>ul>li>button") | ||
38 | +Incheon = soup.select("#mCSB_6_container>ul>li>button") | ||
39 | +DCS = soup.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong | ||
40 | +BDG = soup.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang | ||
41 | +GJ= soup.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla | ||
42 | +Gangwon = soup.select("#mCSB_10_container>ul>li>button") | ||
43 | +dict1 = dict() | ||
44 | +for brch in seoul: | ||
45 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
46 | +for brch in Gyeonggi: | ||
47 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
48 | +for brch in Incheon: | ||
49 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
50 | +for brch in DCS: | ||
51 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
52 | +for brch in BDG: | ||
53 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
54 | +for brch in GJ: | ||
55 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
56 | +for brch in Gangwon: | ||
57 | + dict1[brch['brch-nm']] = brch['brch-no'] | ||
58 | + | ||
59 | +attr1 = soup.select("#mCSB_1_container>ul>li>button") | ||
60 | + | ||
61 | +movie_dict = dict() | ||
62 | +for movie in attr1: | ||
63 | + movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']] | ||
64 | + | ||
65 | +for r, m in zip(ticketing_rate, movie_name): | ||
66 | + movie_dict[m['title']].append(r.string) | ||
67 | + | ||
68 | +rank = 1 | ||
69 | +for value in movie_dict.values(): | ||
70 | + if(len(value) == 2): | ||
71 | + value.append("예메율 0.0%") | ||
72 | + if(rank<=10): | ||
73 | + value.append({'rank' : rank}) | ||
74 | + rank += 1 | ||
75 | +print(movie_dict) | ||
76 | + | ||
77 | +#form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이) | ||
78 | +#brch-no로 쿼리 주고 나서 form-at확인 필요 |
-
Please register or login to post a comment