김윤지

menu_crawling

1 +import os
2 +from time import sleep
3 +
4 +from selenium import webdriver
5 +from selenium.webdriver.common.keys import Keys
6 +from selenium.common.exceptions import ElementNotInteractableException
7 +from bs4 import BeautifulSoup
8 +from selenium.webdriver.common.by import By
9 +
10 +# termianl UTF-8 encoding : chcp 65001
11 +
12 +##########################################################################
13 +##################### variable related selenium ##########################
14 +##########################################################################
15 +options = webdriver.ChromeOptions()
16 +options.add_argument('headless')
17 +options.add_argument('lang=ko_KR')
18 +chromedriver_path = "chromedriver"
19 +driver = webdriver.Chrome(os.path.join(os.getcwd(), chromedriver_path), options=options) # chromedriver 열기
20 +
21 +
22 +def main():
23 + global driver, menu_wb
24 +
25 + driver.implicitly_wait(4) # 렌더링 될때까지 기다린다 4초
26 + driver.get('https://map.kakao.com/') # 주소 가져오기
27 +
28 + search("영통역맛집")
29 +
30 + driver.quit()
31 + print("finish")
32 +
33 +
34 +def search(place):
35 + global driver
36 +
37 + search_area = driver.find_element(By.XPATH,'//*[@id="search.keyword.query"]') # 검색 창
38 + search_area.send_keys(place) # 검색어 입력
39 + driver.find_element(By.XPATH,'//*[@id="search.keyword.submit"]').send_keys(Keys.ENTER) # Enter로 검색
40 + sleep(1)
41 +
42 + # 검색된 정보가 있는 경우에만 탐색
43 + # 1번 페이지 place list 읽기
44 + html = driver.page_source
45 +
46 + soup = BeautifulSoup(html, 'html.parser')
47 + place_lists = soup.select('.placelist > .PlaceItem') # 검색된 장소 목록
48 +
49 + # 검색된 첫 페이지 장소 목록 크롤링하기
50 + crawling(place_lists)
51 + search_area.clear()
52 +
53 + # 우선 더보기 클릭해서 2페이지
54 + try:
55 + driver.find_element(By.XPATH,'//*[@id="info.search.place.more"]').send_keys(Keys.ENTER)
56 + sleep(1)
57 +
58 + # 2~ 5페이지 읽기
59 + for i in range(2, 6):
60 + # 페이지 넘기기
61 + xPath = '//*[@id="info.search.page.no' + str(i) + '"]'
62 + driver.find_element(By.XPATH,xPath).send_keys(Keys.ENTER)
63 + sleep(1)
64 +
65 + html = driver.page_source
66 + soup = BeautifulSoup(html, 'html.parser')
67 + place_lists = soup.select('.placelist > .PlaceItem') # 장소 목록 list
68 +
69 + crawling(place_lists)
70 +
71 + except ElementNotInteractableException:
72 + print('not found')
73 + finally:
74 + search_area.clear()
75 +
76 +def crawling(placeLists):
77 + for i, place in enumerate(placeLists):
78 + menuInfos = getMenuInfo(i, driver)
79 + print(menuInfos)
80 +
81 +def getMenuInfo(i, driver):
82 + # 상세페이지로 가서 메뉴찾기
83 + detail_page_xpath = '//*[@id="info.search.place.list"]/li[' + str(i + 1) + ']/div[5]/div[4]/a[1]'
84 + driver.find_element(By.XPATH,detail_page_xpath).send_keys(Keys.ENTER)
85 + driver.switch_to.window(driver.window_handles[-1]) # 상세정보 탭으로 변환
86 + sleep(1)
87 +
88 + menuInfos = []
89 + html = driver.page_source
90 + soup = BeautifulSoup(html, 'html.parser')
91 +
92 + # 메뉴의 3가지 타입
93 + menuonlyType = soup.select('.cont_menu > .list_menu > .menuonly_type')
94 + nophotoType = soup.select('.cont_menu > .list_menu > .nophoto_type')
95 + photoType = soup.select('.cont_menu > .list_menu > .photo_type')
96 +
97 + if len(menuonlyType) != 0:
98 + for menu in menuonlyType:
99 + menuInfos.append(_getMenuInfo(menu))
100 + elif len(nophotoType) != 0:
101 + for menu in nophotoType:
102 + menuInfos.append(_getMenuInfo(menu))
103 + else:
104 + for menu in photoType:
105 + menuInfos.append(_getMenuInfo(menu))
106 +
107 + driver.close()
108 + driver.switch_to.window(driver.window_handles[0]) # 검색 탭으로 전환
109 +
110 + return menuInfos
111 +
112 +def _getMenuInfo(menu):
113 + menuName = menu.select('.info_menu > .loss_word')[0].text
114 + menuPrices = menu.select('.info_menu > .price_menu')
115 + menuPrice = ''
116 +
117 + if len(menuPrices) != 0:
118 + menuPrice = menuPrices[0].text.split(' ')[1]
119 +
120 + return [menuName, menuPrice]
121 +
122 +if __name__ == "__main__":
123 + main()
...\ No newline at end of file ...\ No newline at end of file