Showing
1 changed file
with
123 additions
and
0 deletions
1 | +import os | ||
2 | +from time import sleep | ||
3 | + | ||
4 | +from selenium import webdriver | ||
5 | +from selenium.webdriver.common.keys import Keys | ||
6 | +from selenium.common.exceptions import ElementNotInteractableException | ||
7 | +from bs4 import BeautifulSoup | ||
8 | +from selenium.webdriver.common.by import By | ||
9 | + | ||
10 | +# termianl UTF-8 encoding : chcp 65001 | ||
11 | + | ||
12 | +########################################################################## | ||
13 | +##################### variable related selenium ########################## | ||
14 | +########################################################################## | ||
15 | +options = webdriver.ChromeOptions() | ||
16 | +options.add_argument('headless') | ||
17 | +options.add_argument('lang=ko_KR') | ||
18 | +chromedriver_path = "chromedriver" | ||
19 | +driver = webdriver.Chrome(os.path.join(os.getcwd(), chromedriver_path), options=options) # chromedriver 열기 | ||
20 | + | ||
21 | + | ||
22 | +def main(): | ||
23 | + global driver, menu_wb | ||
24 | + | ||
25 | + driver.implicitly_wait(4) # 렌더링 될때까지 기다린다 4초 | ||
26 | + driver.get('https://map.kakao.com/') # 주소 가져오기 | ||
27 | + | ||
28 | + search("영통역맛집") | ||
29 | + | ||
30 | + driver.quit() | ||
31 | + print("finish") | ||
32 | + | ||
33 | + | ||
34 | +def search(place): | ||
35 | + global driver | ||
36 | + | ||
37 | + search_area = driver.find_element(By.XPATH,'//*[@id="search.keyword.query"]') # 검색 창 | ||
38 | + search_area.send_keys(place) # 검색어 입력 | ||
39 | + driver.find_element(By.XPATH,'//*[@id="search.keyword.submit"]').send_keys(Keys.ENTER) # Enter로 검색 | ||
40 | + sleep(1) | ||
41 | + | ||
42 | + # 검색된 정보가 있는 경우에만 탐색 | ||
43 | + # 1번 페이지 place list 읽기 | ||
44 | + html = driver.page_source | ||
45 | + | ||
46 | + soup = BeautifulSoup(html, 'html.parser') | ||
47 | + place_lists = soup.select('.placelist > .PlaceItem') # 검색된 장소 목록 | ||
48 | + | ||
49 | + # 검색된 첫 페이지 장소 목록 크롤링하기 | ||
50 | + crawling(place_lists) | ||
51 | + search_area.clear() | ||
52 | + | ||
53 | + # 우선 더보기 클릭해서 2페이지 | ||
54 | + try: | ||
55 | + driver.find_element(By.XPATH,'//*[@id="info.search.place.more"]').send_keys(Keys.ENTER) | ||
56 | + sleep(1) | ||
57 | + | ||
58 | + # 2~ 5페이지 읽기 | ||
59 | + for i in range(2, 6): | ||
60 | + # 페이지 넘기기 | ||
61 | + xPath = '//*[@id="info.search.page.no' + str(i) + '"]' | ||
62 | + driver.find_element(By.XPATH,xPath).send_keys(Keys.ENTER) | ||
63 | + sleep(1) | ||
64 | + | ||
65 | + html = driver.page_source | ||
66 | + soup = BeautifulSoup(html, 'html.parser') | ||
67 | + place_lists = soup.select('.placelist > .PlaceItem') # 장소 목록 list | ||
68 | + | ||
69 | + crawling(place_lists) | ||
70 | + | ||
71 | + except ElementNotInteractableException: | ||
72 | + print('not found') | ||
73 | + finally: | ||
74 | + search_area.clear() | ||
75 | + | ||
76 | +def crawling(placeLists): | ||
77 | + for i, place in enumerate(placeLists): | ||
78 | + menuInfos = getMenuInfo(i, driver) | ||
79 | + print(menuInfos) | ||
80 | + | ||
81 | +def getMenuInfo(i, driver): | ||
82 | + # 상세페이지로 가서 메뉴찾기 | ||
83 | + detail_page_xpath = '//*[@id="info.search.place.list"]/li[' + str(i + 1) + ']/div[5]/div[4]/a[1]' | ||
84 | + driver.find_element(By.XPATH,detail_page_xpath).send_keys(Keys.ENTER) | ||
85 | + driver.switch_to.window(driver.window_handles[-1]) # 상세정보 탭으로 변환 | ||
86 | + sleep(1) | ||
87 | + | ||
88 | + menuInfos = [] | ||
89 | + html = driver.page_source | ||
90 | + soup = BeautifulSoup(html, 'html.parser') | ||
91 | + | ||
92 | + # 메뉴의 3가지 타입 | ||
93 | + menuonlyType = soup.select('.cont_menu > .list_menu > .menuonly_type') | ||
94 | + nophotoType = soup.select('.cont_menu > .list_menu > .nophoto_type') | ||
95 | + photoType = soup.select('.cont_menu > .list_menu > .photo_type') | ||
96 | + | ||
97 | + if len(menuonlyType) != 0: | ||
98 | + for menu in menuonlyType: | ||
99 | + menuInfos.append(_getMenuInfo(menu)) | ||
100 | + elif len(nophotoType) != 0: | ||
101 | + for menu in nophotoType: | ||
102 | + menuInfos.append(_getMenuInfo(menu)) | ||
103 | + else: | ||
104 | + for menu in photoType: | ||
105 | + menuInfos.append(_getMenuInfo(menu)) | ||
106 | + | ||
107 | + driver.close() | ||
108 | + driver.switch_to.window(driver.window_handles[0]) # 검색 탭으로 전환 | ||
109 | + | ||
110 | + return menuInfos | ||
111 | + | ||
112 | +def _getMenuInfo(menu): | ||
113 | + menuName = menu.select('.info_menu > .loss_word')[0].text | ||
114 | + menuPrices = menu.select('.info_menu > .price_menu') | ||
115 | + menuPrice = '' | ||
116 | + | ||
117 | + if len(menuPrices) != 0: | ||
118 | + menuPrice = menuPrices[0].text.split(' ')[1] | ||
119 | + | ||
120 | + return [menuName, menuPrice] | ||
121 | + | ||
122 | +if __name__ == "__main__": | ||
123 | + main() | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment