HyeonJun@khj MINGW64 ~/Desktop/2018-1-capstone_design_1-Automated_calculation_system (master)

$ git add * HyeonJun@khj MINGW64 ~/Desktop/2018-1-capstone_design_1-Automated_calculation_system (master) $ git commit -m

HyeonJun@khj MINGW64 ~/Desktop/2018-1-capstone_design_1-Automated_calculation_system (master)
$ git add * HyeonJun@khj MINGW64 ~/Desktop/2018-1-capstone_design_1-Automated_calculation_system (master) $ git commit -m
Hyunjun
Commit 721caa3ac5ded1574592a7357c93259982421561 721caa3a 1 parent 743df32b
Showing 2 changed files with 46 additions and 1 deletions
README.md
google_image_crwaling/google_image_crwaling.py
--- a/README.md
View file @721caa3
+++ b/README.md
View file @721caa3
@@ -39,4 +39,9 @@ python test 폴더에는 test에 필요하지 않은 train 부분을 삭제함
  5) params.txt : 속도향상을 위해 params.pkl파일을 params.txt로 변환
  6) main.cpp 
  *c++ 컴파일러 버전 11이상
-  *프로젝트 생성 시 sdl 검사 체크 해제
\ No newline at end of file
+  *프로젝트 생성 시 sdl 검사 체크 해제
+ 
+ 
+ 10. google_image_crwaling 코드 추가
+ 
+ 필요한 데이터셋을 만들기 위한 google_image_crwaling 코드 추가
\ No newline at end of file
--- a/google_image_crwaling/google_image_crwaling.py 0 → 100644
View file @721caa3
+++ b/google_image_crwaling/google_image_crwaling.py 0 → 100644
View file @721caa3
+ import requests
+ from lxml.html import parse
+ from io import StringIO
+ import os, sys
+ from PIL import Image
+ import urllib.request
+ 
+ 
+ # 검색할 이미지의 키워드 입력
+ keyword = input("검색할 이미지를 입력하세요 : ")
+ url = 'https://www.google.co.kr/search?q='+keyword+'&source=lnms&tbm=isch&sa=X&ved=0ahUKEwic-taB9IXVAhWDHpQKHXOjC14Q_AUIBigB&biw=1842&bih=990'
+ 
+  # html 소스 가져오기
+ text = requests.get(url).text
+ 
+ # html 문서로 파싱
+ text_source = StringIO(text)
+ parsed = parse(text_source)
+ 
+ # root node
+ doc = parsed.getroot()
+ 
+ # img 경로는 img 태그안에 src에 있음(20개 크롤링)
+ imgs = doc.findall('.//img')
+ 
+ img_list = []   # 이미지 경로가 담길 list
+ cnt=0
+ 
+ for a in imgs:
+     if cnt>0 and cnt<11:
+         img_list.append(a.get('src'))
+         image_url = a.get('src')
+         filename = keyword + str(cnt) + '.jpg'
+         fd = os.open(filename, os.O_WRONLY|os.O_BINARY|os.O_CREAT)
+         ud = urllib.request.urlopen(image_url)
+         binary = ud.read()
+         os.write(fd, binary)
+         os.close(fd)
+         print(filename + ' download complete')
+     cnt = cnt+1