임승현

Merge branch 'feature/Megabox_Crawling' into 'feature/Megabox_Crawling'

Feature/megabox crawling

megabox crawling first merge

See merge request !11
1 +from urllib import response
2 +import requests
3 +from bs4 import BeautifulSoup
4 +from selenium import webdriver
5 +from webdriver_manager.chrome import ChromeDriverManager
6 +from selenium.webdriver.common.keys import Keys
7 +
8 +options = webdriver.ChromeOptions()
9 +options.add_argument("headless") #창 안 띄움
10 +options.add_experimental_option("excludeSwitches", ["enable-logging"])
11 +driver = webdriver.Chrome(options = options)
12 +driver2=webdriver.Chrome(options = options)
13 +
14 +url = "https://megabox.co.kr/booking"
15 +rate_url = "https://www.megabox.co.kr/movie"
16 +driver.maximize_window()
17 +driver2.maximize_window()
18 +
19 +driver.implicitly_wait(2)
20 +driver.get(url)
21 +
22 +driver2.implicitly_wait(2)
23 +driver2.get(rate_url)
24 +
25 +r2 = driver2.page_source
26 +soup = BeautifulSoup(r2, "html.parser")
27 +ticketing_rate = soup.select('.rate')
28 +movie_name = soup.select('.tit-area > p.tit')
29 +
30 +iframes = driver.find_elements_by_css_selector('iframe')
31 +
32 +driver.switch_to.frame('frameBokdMBooking')
33 +r = driver.page_source
34 +soup = BeautifulSoup(r, "html.parser")
35 +
36 +seoul = soup.select("#mCSB_4_container>ul>li>button")
37 +Gyeonggi = soup.select("#mCSB_5_container>ul>li>button")
38 +Incheon = soup.select("#mCSB_6_container>ul>li>button")
39 +DCS = soup.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong
40 +BDG = soup.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang
41 +GJ= soup.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla
42 +Gangwon = soup.select("#mCSB_10_container>ul>li>button")
43 +dict1 = dict()
44 +for brch in seoul:
45 + dict1[brch['brch-nm']] = brch['brch-no']
46 +for brch in Gyeonggi:
47 + dict1[brch['brch-nm']] = brch['brch-no']
48 +for brch in Incheon:
49 + dict1[brch['brch-nm']] = brch['brch-no']
50 +for brch in DCS:
51 + dict1[brch['brch-nm']] = brch['brch-no']
52 +for brch in BDG:
53 + dict1[brch['brch-nm']] = brch['brch-no']
54 +for brch in GJ:
55 + dict1[brch['brch-nm']] = brch['brch-no']
56 +for brch in Gangwon:
57 + dict1[brch['brch-nm']] = brch['brch-no']
58 +
59 +attr1 = soup.select("#mCSB_1_container>ul>li>button")
60 +
61 +movie_dict = dict()
62 +for movie in attr1:
63 + movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']]
64 +
65 +for r, m in zip(ticketing_rate, movie_name):
66 + movie_dict[m['title']].append(r.string)
67 +
68 +rank = 1
69 +for value in movie_dict.values():
70 + if(len(value) == 2):
71 + value.append("예메율 0.0%")
72 + if(rank<=10):
73 + value.append({'rank' : rank})
74 + rank += 1
75 +print(movie_dict)
76 +
77 +#form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이)
78 +#brch-no로 쿼리 주고 나서 form-at확인 필요