Merge branch 'feature/Megabox_Crawling' into 'feature/Megabox_Crawling'
코드 정리 See merge request !12
Showing
1 changed file
with
45 additions
and
48 deletions
| 1 | -from urllib import response | ||
| 2 | -import requests | ||
| 3 | from bs4 import BeautifulSoup | 1 | from bs4 import BeautifulSoup |
| 4 | from selenium import webdriver | 2 | from selenium import webdriver |
| 5 | -from webdriver_manager.chrome import ChromeDriverManager | 3 | +import chromedriver_autoinstaller |
| 6 | -from selenium.webdriver.common.keys import Keys | 4 | + |
| 5 | +chromedriver_autoinstaller.install() | ||
| 6 | + | ||
| 7 | +booking_url = "https://megabox.co.kr/booking" | ||
| 8 | +rate_url = "https://www.megabox.co.kr/movie" | ||
| 7 | 9 | ||
| 8 | options = webdriver.ChromeOptions() | 10 | options = webdriver.ChromeOptions() |
| 9 | options.add_argument("headless") #창 안 띄움 | 11 | options.add_argument("headless") #창 안 띄움 |
| 10 | options.add_experimental_option("excludeSwitches", ["enable-logging"]) | 12 | options.add_experimental_option("excludeSwitches", ["enable-logging"]) |
| 11 | -driver = webdriver.Chrome(options = options) | ||
| 12 | -driver2=webdriver.Chrome(options = options) | ||
| 13 | 13 | ||
| 14 | -url = "https://megabox.co.kr/booking" | 14 | +driver = webdriver.Chrome(options = options) |
| 15 | -rate_url = "https://www.megabox.co.kr/movie" | ||
| 16 | driver.maximize_window() | 15 | driver.maximize_window() |
| 17 | -driver2.maximize_window() | 16 | +# driver.implicitly_wait(2) |
| 18 | - | 17 | +driver.get(booking_url) |
| 19 | -driver.implicitly_wait(2) | ||
| 20 | -driver.get(url) | ||
| 21 | 18 | ||
| 22 | -driver2.implicitly_wait(2) | 19 | +driver2=webdriver.Chrome(options = options) |
| 20 | +driver2.maximize_window() | ||
| 21 | +# driver2.implicitly_wait(2) | ||
| 23 | driver2.get(rate_url) | 22 | driver2.get(rate_url) |
| 24 | 23 | ||
| 25 | -r2 = driver2.page_source | ||
| 26 | -soup = BeautifulSoup(r2, "html.parser") | ||
| 27 | -ticketing_rate = soup.select('.rate') | ||
| 28 | -movie_name = soup.select('.tit-area > p.tit') | ||
| 29 | 24 | ||
| 30 | -iframes = driver.find_elements_by_css_selector('iframe') | 25 | +theater_location = dict() |
| 31 | 26 | ||
| 27 | +# iframes = driver.find_elements_by_css_selector('iframe') | ||
| 32 | driver.switch_to.frame('frameBokdMBooking') | 28 | driver.switch_to.frame('frameBokdMBooking') |
| 33 | -r = driver.page_source | 29 | +page1 = driver.page_source |
| 34 | -soup = BeautifulSoup(r, "html.parser") | 30 | +soup1 = BeautifulSoup(page1, "html.parser") |
| 35 | - | 31 | + |
| 36 | -seoul = soup.select("#mCSB_4_container>ul>li>button") | 32 | +seoul = soup1.select("#mCSB_4_container>ul>li>button") |
| 37 | -Gyeonggi = soup.select("#mCSB_5_container>ul>li>button") | 33 | +Gyeonggi = soup1.select("#mCSB_5_container>ul>li>button") |
| 38 | -Incheon = soup.select("#mCSB_6_container>ul>li>button") | 34 | +Incheon = soup1.select("#mCSB_6_container>ul>li>button") |
| 39 | -DCS = soup.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong | 35 | +DCS = soup1.select("#mCSB_7_container>ul>li>button")#Daejeon Chungcheong Sejong |
| 40 | -BDG = soup.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang | 36 | +BDG = soup1.select("#mCSB_8_container>ul>li>button")#Busan Daegu Gyeongsang |
| 41 | -GJ= soup.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla | 37 | +GJ= soup1.select("#mCSB_9_container>ul>li>button")#gwangju_jeonla |
| 42 | -Gangwon = soup.select("#mCSB_10_container>ul>li>button") | 38 | +Gangwon = soup1.select("#mCSB_10_container>ul>li>button") |
| 43 | -dict1 = dict() | 39 | + |
| 44 | -for brch in seoul: | 40 | +loc = [seoul, Gyeonggi, Incheon, DCS, BDG, GJ, Gangwon] |
| 45 | - dict1[brch['brch-nm']] = brch['brch-no'] | 41 | + |
| 46 | -for brch in Gyeonggi: | 42 | +def get_location_code(location): |
| 47 | - dict1[brch['brch-nm']] = brch['brch-no'] | 43 | + for brch in location: |
| 48 | -for brch in Incheon: | 44 | + theater_location[brch['brch-nm']] = brch['brch-no'] |
| 49 | - dict1[brch['brch-nm']] = brch['brch-no'] | 45 | + |
| 50 | -for brch in DCS: | 46 | + |
| 51 | - dict1[brch['brch-nm']] = brch['brch-no'] | 47 | +for parameter in loc: |
| 52 | -for brch in BDG: | 48 | + get_location_code(parameter) |
| 53 | - dict1[brch['brch-nm']] = brch['brch-no'] | 49 | + |
| 54 | -for brch in GJ: | 50 | + |
| 55 | - dict1[brch['brch-nm']] = brch['brch-no'] | 51 | +page2 = driver2.page_source |
| 56 | -for brch in Gangwon: | 52 | +soup2 = BeautifulSoup(page2, "html.parser") |
| 57 | - dict1[brch['brch-nm']] = brch['brch-no'] | 53 | +ticketing_rate = soup2.select('.rate') |
| 58 | - | 54 | +movie_name = soup2.select('.tit-area > p.tit') |
| 59 | -attr1 = soup.select("#mCSB_1_container>ul>li>button") | 55 | +get_movie_info = soup1.select("#mCSB_1_container>ul>li>button") |
| 60 | 56 | ||
| 61 | movie_dict = dict() | 57 | movie_dict = dict() |
| 62 | -for movie in attr1: | 58 | +rank = 1 |
| 59 | + | ||
| 60 | +for movie in get_movie_info: | ||
| 63 | movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']] | 61 | movie_dict[movie['movie-nm']] = [movie['movie-no'], movie['form-at']] |
| 64 | 62 | ||
| 65 | for r, m in zip(ticketing_rate, movie_name): | 63 | for r, m in zip(ticketing_rate, movie_name): |
| 66 | movie_dict[m['title']].append(r.string) | 64 | movie_dict[m['title']].append(r.string) |
| 67 | 65 | ||
| 68 | -rank = 1 | 66 | + |
| 69 | for value in movie_dict.values(): | 67 | for value in movie_dict.values(): |
| 70 | if(len(value) == 2): | 68 | if(len(value) == 2): |
| 71 | value.append("예메율 0.0%") | 69 | value.append("예메율 0.0%") |
| 72 | if(rank<=10): | 70 | if(rank<=10): |
| 73 | value.append({'rank' : rank}) | 71 | value.append({'rank' : rank}) |
| 74 | rank += 1 | 72 | rank += 1 |
| 75 | -print(movie_dict) | ||
| 76 | 73 | ||
| 77 | #form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이) | 74 | #form-at: 처음 그냥 받아올 때 해당 날짜에 영화 있는지 확인(장소 상관 없이) |
| 78 | #brch-no로 쿼리 주고 나서 form-at확인 필요 | 75 | #brch-no로 쿼리 주고 나서 form-at확인 필요 |
| ... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment