google_image_crwaling.py 1.09 KB
import requests
from lxml.html import parse
from io import StringIO
import os, sys
from PIL import Image
import urllib.request


# 검색할 이미지의 키워드 입력
keyword = input("검색할 이미지를 입력하세요 : ")
url = 'https://www.google.co.kr/search?q='+keyword+'&source=lnms&tbm=isch&sa=X&ved=0ahUKEwic-taB9IXVAhWDHpQKHXOjC14Q_AUIBigB&biw=1842&bih=990'

 # html 소스 가져오기
text = requests.get(url).text

# html 문서로 파싱
text_source = StringIO(text)
parsed = parse(text_source)

# root node
doc = parsed.getroot()

# img 경로는 img 태그안에 src에 있음(20개 크롤링)
imgs = doc.findall('.//img')

img_list = []   # 이미지 경로가 담길 list
cnt=0

for a in imgs:
    if cnt>0 and cnt<11:
        img_list.append(a.get('src'))
        image_url = a.get('src')
        filename = keyword + str(cnt) + '.jpg'
        fd = os.open(filename, os.O_WRONLY|os.O_BINARY|os.O_CREAT)
        ud = urllib.request.urlopen(image_url)
        binary = ud.read()
        os.write(fd, binary)
        os.close(fd)
        print(filename + ' download complete')
    cnt = cnt+1