menu_crawling

김윤지
Commit 9132868136609a29c6d5323ee086945619d14f47 91328681 1 parent a1a60a03
Showing 1 changed file with 123 additions and 0 deletions
menu_python_crawling/kakaoMap_menu_crawling.py
--- a/menu_python_crawling/kakaoMap_menu_crawling.py 0 → 100644
View file @9132868
+++ b/menu_python_crawling/kakaoMap_menu_crawling.py 0 → 100644
View file @9132868
+ import os
+ from time import sleep
+ 
+ from selenium import webdriver
+ from selenium.webdriver.common.keys import Keys
+ from selenium.common.exceptions import ElementNotInteractableException
+ from bs4 import BeautifulSoup
+ from selenium.webdriver.common.by import By
+ 
+ # termianl UTF-8 encoding : chcp 65001
+ 
+ ##########################################################################
+ ##################### variable related selenium ##########################
+ ##########################################################################
+ options = webdriver.ChromeOptions()
+ options.add_argument('headless')
+ options.add_argument('lang=ko_KR')
+ chromedriver_path = "chromedriver"
+ driver = webdriver.Chrome(os.path.join(os.getcwd(), chromedriver_path), options=options)  # chromedriver 열기
+ 
+ 
+ def main():
+     global driver, menu_wb
+ 
+     driver.implicitly_wait(4)  # 렌더링 될때까지 기다린다 4초
+     driver.get('https://map.kakao.com/')  # 주소 가져오기
+ 
+     search("영통역맛집")
+ 
+     driver.quit()
+     print("finish")
+ 
+ 
+ def search(place):
+     global driver
+ 
+     search_area = driver.find_element(By.XPATH,'//*[@id="search.keyword.query"]')  # 검색 창
+     search_area.send_keys(place)  # 검색어 입력
+     driver.find_element(By.XPATH,'//*[@id="search.keyword.submit"]').send_keys(Keys.ENTER)  # Enter로 검색
+     sleep(1)
+ 
+     # 검색된 정보가 있는 경우에만 탐색
+     # 1번 페이지 place list 읽기
+     html = driver.page_source
+ 
+     soup = BeautifulSoup(html, 'html.parser')
+     place_lists = soup.select('.placelist > .PlaceItem') # 검색된 장소 목록
+ 
+     # 검색된 첫 페이지 장소 목록 크롤링하기
+     crawling(place_lists)
+     search_area.clear()
+ 
+     # 우선 더보기 클릭해서 2페이지
+     try:
+         driver.find_element(By.XPATH,'//*[@id="info.search.place.more"]').send_keys(Keys.ENTER)
+         sleep(1)
+ 
+         # 2~ 5페이지 읽기
+         for i in range(2, 6):
+             # 페이지 넘기기
+             xPath = '//*[@id="info.search.page.no' + str(i) + '"]'
+             driver.find_element(By.XPATH,xPath).send_keys(Keys.ENTER)
+             sleep(1)
+ 
+             html = driver.page_source
+             soup = BeautifulSoup(html, 'html.parser')
+             place_lists = soup.select('.placelist > .PlaceItem') # 장소 목록 list
+ 
+             crawling(place_lists)
+ 
+     except ElementNotInteractableException:
+         print('not found')
+     finally:
+         search_area.clear()
+ 
+ def crawling(placeLists):
+     for i, place in enumerate(placeLists):
+         menuInfos = getMenuInfo(i, driver)
+         print(menuInfos)
+ 
+ def getMenuInfo(i, driver):
+     # 상세페이지로 가서 메뉴찾기
+     detail_page_xpath = '//*[@id="info.search.place.list"]/li[' + str(i + 1) + ']/div[5]/div[4]/a[1]'
+     driver.find_element(By.XPATH,detail_page_xpath).send_keys(Keys.ENTER)
+     driver.switch_to.window(driver.window_handles[-1])  # 상세정보 탭으로 변환
+     sleep(1)
+ 
+     menuInfos = []
+     html = driver.page_source
+     soup = BeautifulSoup(html, 'html.parser')
+ 
+     # 메뉴의 3가지 타입
+     menuonlyType = soup.select('.cont_menu > .list_menu > .menuonly_type')
+     nophotoType = soup.select('.cont_menu > .list_menu > .nophoto_type')
+     photoType = soup.select('.cont_menu > .list_menu > .photo_type')
+ 
+     if len(menuonlyType) != 0:
+         for menu in menuonlyType:
+             menuInfos.append(_getMenuInfo(menu))
+     elif len(nophotoType) != 0:
+         for menu in nophotoType:
+             menuInfos.append(_getMenuInfo(menu))
+     else:
+         for menu in photoType:
+             menuInfos.append(_getMenuInfo(menu))
+ 
+     driver.close()
+     driver.switch_to.window(driver.window_handles[0])  # 검색 탭으로 전환
+ 
+     return menuInfos
+ 
+ def _getMenuInfo(menu):
+     menuName = menu.select('.info_menu > .loss_word')[0].text
+     menuPrices = menu.select('.info_menu > .price_menu')
+     menuPrice = ''
+ 
+     if len(menuPrices) != 0:
+         menuPrice =  menuPrices[0].text.split(' ')[1]
+ 
+     return [menuName, menuPrice]
+ 
+ if __name__ == "__main__":
+     main()
\ No newline at end of file