Showing
21 changed files
with
895 additions
and
0 deletions
Code/detection.py
0 → 100644
1 | +import cv2 | ||
2 | +import numpy as np | ||
3 | +import serial | ||
4 | + | ||
5 | +# 웹캠 신호 받기 | ||
6 | +VideoSignal = cv2.VideoCapture(0) | ||
7 | +# YOLO 가중치 파일과 CFG 파일 로드 | ||
8 | +YOLO_net = cv2.dnn.readNet('yolov3-tiny_last.weights','yolov3-tiny.cfg') | ||
9 | + | ||
10 | +# YOLO NETWORK 재구성 | ||
11 | +classes = [] | ||
12 | +with open("obj.names", "r") as f: | ||
13 | + classes = [line.strip() for line in f.readlines()] | ||
14 | +layer_names = YOLO_net.getLayerNames() | ||
15 | +output_layers = [layer_names[i[0] - 1] for i in YOLO_net.getUnconnectedOutLayers()] | ||
16 | + | ||
17 | +while True: | ||
18 | + # 웹캠 프레임 | ||
19 | + ret, frame = VideoSignal.read() | ||
20 | + h, w, c = frame.shape | ||
21 | + | ||
22 | + | ||
23 | + # YOLO 입력 | ||
24 | + blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), | ||
25 | + True, crop=False) | ||
26 | + YOLO_net.setInput(blob) | ||
27 | + outs = YOLO_net.forward(output_layers) | ||
28 | + | ||
29 | + class_ids = [] | ||
30 | + confidences = [] | ||
31 | + boxes = [] | ||
32 | + | ||
33 | + for out in outs: | ||
34 | + | ||
35 | + for detection in out: | ||
36 | + | ||
37 | + scores = detection[5:] | ||
38 | + class_id = np.argmax(scores) | ||
39 | + confidence = scores[class_id] | ||
40 | + | ||
41 | + if confidence > 0.3: | ||
42 | + # Object detected | ||
43 | + center_x = int(detection[0] * w) | ||
44 | + center_y = int(detection[1] * h) | ||
45 | + dw = int(detection[2] * w) | ||
46 | + dh = int(detection[3] * h) | ||
47 | + # Rectangle coordinate | ||
48 | + x = int(center_x - dw / 2) | ||
49 | + y = int(center_y - dh / 2) | ||
50 | + boxes.append([x, y, dw, dh]) | ||
51 | + confidences.append(float(confidence)) | ||
52 | + class_ids.append(class_id) | ||
53 | + | ||
54 | + indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.45, 0.4) | ||
55 | + | ||
56 | + | ||
57 | + | ||
58 | + for i in range(len(boxes)): | ||
59 | + if i in indexes: | ||
60 | + x, y, w, h = boxes[i] | ||
61 | + label = 'box' | ||
62 | + score = confidences[i] | ||
63 | + | ||
64 | + # 경계상자와 클래스 정보 이미지에 입력 | ||
65 | + cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 5) | ||
66 | + cv2.putText(frame, label, (x, y - 20), cv2.FONT_ITALIC, 0.5, | ||
67 | + (255, 255, 255), 1) | ||
68 | + | ||
69 | + cv2.imshow("YOLOv3", frame) | ||
70 | + | ||
71 | + if cv2.waitKey(100) > 0: | ||
72 | + break |
Code/keras_test.py
0 → 100644
1 | +from IPython.display import display | ||
2 | +from PIL import Image | ||
3 | +from yolo import YOLO | ||
4 | +import cvlib as cv | ||
5 | +from cvlib.object_detection import draw_bbox | ||
6 | +import cv2 | ||
7 | +import tensorflow.compat.v1.keras.backend as K | ||
8 | +import tensorflow as tf | ||
9 | +tf.compat.v1.disable_eager_execution() | ||
10 | + | ||
11 | +def objectDetection(file, model_path, class_path): | ||
12 | + yolo = YOLO(model_path=model_path, classes_path=class_path, anchors_path='model_data/tiny_yolo_anchors.txt') | ||
13 | + image = Image.open(file) | ||
14 | + result_image = yolo.detect_image(image) | ||
15 | + result_image.save('test.jpg','JPEG') | ||
16 | + | ||
17 | +webcam = cv2.VideoCapture(0) | ||
18 | + | ||
19 | +if not webcam.isOpened(): | ||
20 | + print("Could not open webcam") | ||
21 | + exit() | ||
22 | + | ||
23 | +yolo = YOLO(model_path='model_data/yolo_tiny_best.h5', classes_path='data/box/classes.txt', anchors_path='model_data/tiny_yolo_anchors.txt') | ||
24 | + | ||
25 | +# loop through frames | ||
26 | +while webcam.isOpened(): | ||
27 | + status, frame = webcam.read() | ||
28 | + | ||
29 | + if not status: | ||
30 | + break | ||
31 | + ''' | ||
32 | + cv2.imwrite('frame.jpg', frame) | ||
33 | + objectDetection('frame.jpg', 'model_data/yolo_tiny_best.h5', 'data/box/classes.txt') | ||
34 | + out = cv2.imread('test.jpg') | ||
35 | + ''' | ||
36 | + cv2.imwrite('frame.jpg', frame) | ||
37 | + tst = Image.open('frame.jpg') | ||
38 | + out = yolo.detect_image(tst) | ||
39 | + out.save('test.jpg', 'JPEG') | ||
40 | + out = cv2.imread('test.jpg') | ||
41 | + cv2.imshow("Real-time object detection", out) | ||
42 | + if cv2.waitKey(1) & 0xFF == ord('q'): | ||
43 | + break | ||
44 | + | ||
45 | +webcam.release() | ||
46 | +cv2.destroyAllWindows() | ||
47 | + | ||
48 | + |
Code/main.py
0 → 100644
1 | +import serial | ||
2 | +import cv2 | ||
3 | +import numpy as np | ||
4 | +import ocr_image | ||
5 | + | ||
6 | +ser = serial.Serial('/dev/ttyAMA0',115200) | ||
7 | +if(ser.isOpen()): | ||
8 | + print("Serial Communication in operation") | ||
9 | + | ||
10 | +LiveCam = cv2.VideoCapture(0) | ||
11 | +YOLO_net = cv2.dnn.readNet('yolov3-tiny_best.weights','yolov3-tiny.cfg') | ||
12 | + | ||
13 | +classes = ['box'] | ||
14 | +layer_names = YOLO_net.getLayerNames() | ||
15 | +output_layers = [layer_names[i[0] - 1] for i in YOLO_net.getUnconnectedOutLayers()] | ||
16 | + | ||
17 | +frame_num = 0 | ||
18 | + | ||
19 | +while LiveCam.isOpened(): | ||
20 | + ret, frame = LiveCam.read() | ||
21 | + if ret is False: | ||
22 | + print("No Video Input") | ||
23 | + break | ||
24 | + if frame_num != 20: | ||
25 | + frame_num += 1 | ||
26 | + elif frame_num == 20: | ||
27 | + frame_num = 0 | ||
28 | + | ||
29 | + h, w, c = frame.shape | ||
30 | + | ||
31 | + blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False) | ||
32 | + YOLO_net.setInput(blob) | ||
33 | + outs = YOLO_net.forward(output_layers) | ||
34 | + | ||
35 | + class_ids = [] | ||
36 | + confidences = [] | ||
37 | + boxes = [] | ||
38 | + | ||
39 | + for out in outs: | ||
40 | + | ||
41 | + for detection in out: | ||
42 | + | ||
43 | + scores = detection[5:] | ||
44 | + class_id = np.argmax(scores) | ||
45 | + confidence = scores[class_id] | ||
46 | + | ||
47 | + if confidence > 0.4: | ||
48 | + center_x = int(detection[0] * w) | ||
49 | + center_y = int(detection[1] * h) | ||
50 | + dw = int(detection[2] * w) | ||
51 | + dh = int(detection[3] * h) | ||
52 | + x = int(center_x - dw / 2) | ||
53 | + y = int(center_y - dh / 2) | ||
54 | + boxes.append([x, y, dw, dh]) | ||
55 | + confidences.append(float(confidence)) | ||
56 | + class_ids.append(class_id) | ||
57 | + | ||
58 | + indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.45, 0.4) | ||
59 | + | ||
60 | + | ||
61 | + if confidences: | ||
62 | + bestscore = confidences.index(max(confidences)) | ||
63 | + best_x, best_y, best_w, best_h = boxes[bestscore] | ||
64 | + | ||
65 | + if best_x > 320 : | ||
66 | + print("오른쪽으로 이동") | ||
67 | + ser.write(serial.to_bytes([int('1',16)])) | ||
68 | + | ||
69 | + elif best_x + best_w < 320 : | ||
70 | + print("왼쪽으로 이동") | ||
71 | + ser.write(serial.to_bytes([int('2',16)])) | ||
72 | + | ||
73 | + else : | ||
74 | + print("직진") | ||
75 | + cv2.imwrite('cap_img.jpg', frame) | ||
76 | + ser.write(serial.to_bytes([int('3',16)])) | ||
77 | + break | ||
78 | + | ||
79 | + cv2.rectangle(frame, (best_x, best_y), (best_x + best_w, best_y + best_h), (0, 0, 255), 5) | ||
80 | + cv2.putText(frame, 'box', (best_x, best_y - 20), cv2.FONT_ITALIC, 0.5, (255, 255, 255), 1) | ||
81 | + | ||
82 | + cv2.imshow("YOLOv3", frame) | ||
83 | + | ||
84 | + if cv2.waitKey(100) > 0: | ||
85 | + break | ||
86 | +''' | ||
87 | +image = cv2.imread("cap_img.jpg") | ||
88 | +template = cv2.imread("myform.jpg") | ||
89 | + | ||
90 | +ocr_result = ocr_form.ocr(image, template) | ||
91 | + | ||
92 | +(name, result) = ocr_result["name"] | ||
93 | +(address, result) = ocr_result["address"] | ||
94 | +(detail_address, result) = ocr_result["detail_address"] | ||
95 | + | ||
96 | +name = name.replace(" ","") | ||
97 | +address = address.replace(" ","") | ||
98 | +detail_address = detail_address.replace(" ","") | ||
99 | + | ||
100 | +print(name) | ||
101 | +print(address) | ||
102 | +print(detail_address) | ||
103 | + | ||
104 | +''' |
Code/model_data/box_yolo.h5
0 → 100644
This file is too large to display.
Code/model_data/coco_classes.txt
0 → 100644
1 | +person | ||
2 | +bicycle | ||
3 | +car | ||
4 | +motorbike | ||
5 | +aeroplane | ||
6 | +bus | ||
7 | +train | ||
8 | +truck | ||
9 | +boat | ||
10 | +traffic light | ||
11 | +fire hydrant | ||
12 | +stop sign | ||
13 | +parking meter | ||
14 | +bench | ||
15 | +bird | ||
16 | +cat | ||
17 | +dog | ||
18 | +horse | ||
19 | +sheep | ||
20 | +cow | ||
21 | +elephant | ||
22 | +bear | ||
23 | +zebra | ||
24 | +giraffe | ||
25 | +backpack | ||
26 | +umbrella | ||
27 | +handbag | ||
28 | +tie | ||
29 | +suitcase | ||
30 | +frisbee | ||
31 | +skis | ||
32 | +snowboard | ||
33 | +sports ball | ||
34 | +kite | ||
35 | +baseball bat | ||
36 | +baseball glove | ||
37 | +skateboard | ||
38 | +surfboard | ||
39 | +tennis racket | ||
40 | +bottle | ||
41 | +wine glass | ||
42 | +cup | ||
43 | +fork | ||
44 | +knife | ||
45 | +spoon | ||
46 | +bowl | ||
47 | +banana | ||
48 | +apple | ||
49 | +sandwich | ||
50 | +orange | ||
51 | +broccoli | ||
52 | +carrot | ||
53 | +hot dog | ||
54 | +pizza | ||
55 | +donut | ||
56 | +cake | ||
57 | +chair | ||
58 | +sofa | ||
59 | +pottedplant | ||
60 | +bed | ||
61 | +diningtable | ||
62 | +toilet | ||
63 | +tvmonitor | ||
64 | +laptop | ||
65 | +mouse | ||
66 | +remote | ||
67 | +keyboard | ||
68 | +cell phone | ||
69 | +microwave | ||
70 | +oven | ||
71 | +toaster | ||
72 | +sink | ||
73 | +refrigerator | ||
74 | +book | ||
75 | +clock | ||
76 | +vase | ||
77 | +scissors | ||
78 | +teddy bear | ||
79 | +hair drier | ||
80 | +toothbrush |
Code/model_data/tiny_yolo_anchors.txt
0 → 100644
1 | +10,14, 23,27, 37,58, 81,82, 135,169, 344,319 |
Code/model_data/voc_classes.txt
0 → 100644
Code/model_data/yolo_anchors.txt
0 → 100644
1 | +10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 |
Code/model_data/yolo_tiny.h5
0 → 100644
This file is too large to display.
Code/model_data/yolo_tiny_best.h5
0 → 100644
This file is too large to display.
Code/myform.jpg
0 → 100644

212 KB
Code/obj.data
0 → 100644
Code/obj.names
0 → 100644
1 | +box | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
Code/ocr_image.py
0 → 100644
1 | +import rotate_image | ||
2 | +from collections import namedtuple | ||
3 | +import pytesseract | ||
4 | +import argparse | ||
5 | +import imutils | ||
6 | +import cv2 | ||
7 | + | ||
8 | +def ocr(image, template) : | ||
9 | + print("[Loading...] OCR Location Setting") | ||
10 | + | ||
11 | + OCRLocation = namedtuple("OCRLocation", ["id", "bbox", "filter_keywords"]) | ||
12 | + | ||
13 | + OCR_Locations = [ | ||
14 | + OCRLocation("name", (27, 96, 60, 20), []), | ||
15 | + OCRLocation("address", (27, 115, 276, 21), []), | ||
16 | + OCRLocation("detail_address", (28, 134, 409, 36), []), | ||
17 | + ] | ||
18 | + | ||
19 | + print("[Loading...] aligning images") | ||
20 | + aligned = rotate_image.rotate_image(image, template) | ||
21 | + | ||
22 | + print("[Loading...] Proceeding OCR") | ||
23 | + parsingResults = [] | ||
24 | + | ||
25 | + for loc in OCR_Locations: | ||
26 | + (x, y, w, h) = loc.bbox | ||
27 | + roi = aligned[y:y+h, x:x+w] | ||
28 | + cv2.imshow(loc.id, roi) | ||
29 | + cv2.waitKey(0) | ||
30 | + | ||
31 | + rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB) | ||
32 | + text = pytesseract.image_to_string(rgb, lang='Hangul') | ||
33 | + | ||
34 | + for line in text.split("\n"): | ||
35 | + if len(line) == 0: | ||
36 | + continue | ||
37 | + | ||
38 | + lower = line.lower() | ||
39 | + count = sum([lower.count(x) for x in loc.filter_keywords]) | ||
40 | + | ||
41 | + if count == 0: | ||
42 | + parsingResults.append((loc, line)) | ||
43 | + | ||
44 | + results = {} | ||
45 | + | ||
46 | + for (loc, line) in parsingResults: | ||
47 | + r = results.get(loc.id, None) | ||
48 | + | ||
49 | + if r is None: | ||
50 | + results[loc.id] = (line, loc._asdict()) | ||
51 | + | ||
52 | + else: | ||
53 | + (existingText, loc) = r | ||
54 | + text = "{}\n{}".format(existingText, line) | ||
55 | + | ||
56 | + results[loc["id"]] = (text, loc) | ||
57 | + | ||
58 | + for (locID, result) in results.items(): | ||
59 | + (text, loc) = result | ||
60 | + | ||
61 | + print(loc["id"]) | ||
62 | + print("=" * len(loc["id"])) | ||
63 | + print("{}\n".format(text)) | ||
64 | + | ||
65 | + cv2.imshow("Input", imutils.resize(image)) | ||
66 | + cv2.imshow("Output", imutils.resize(aligned)) | ||
67 | + cv2.waitKey(0) | ||
68 | + | ||
69 | + return results |
Code/ocrtest.py
0 → 100644
1 | +import cv2 | ||
2 | +import numpy as np | ||
3 | +import ocr_form | ||
4 | + | ||
5 | +image = cv2.imread("myimg1.png") | ||
6 | +template = cv2.imread("myform.jpg") | ||
7 | + | ||
8 | +ocr_result = ocr_form.ocr(image, template) | ||
9 | + | ||
10 | +(name, result) = ocr_result["name"] | ||
11 | +(address, result) = ocr_result["address"] | ||
12 | +(detail_address, result) = ocr_result["detail_address"] | ||
13 | + | ||
14 | +name = name.replace(" ","") | ||
15 | +address = address.replace(" ","") | ||
16 | +detail_address = detail_address.replace(" ","") | ||
17 | + | ||
18 | +print(name) | ||
19 | +print(address) | ||
20 | +print(detail_address) |
Code/opencvlib.py
0 → 100644
1 | +# import necessary packages | ||
2 | +import cvlib as cv | ||
3 | +from cvlib.object_detection import draw_bbox | ||
4 | +import cv2 | ||
5 | + | ||
6 | +# open webcam (웹캠 열기) | ||
7 | +webcam = cv2.VideoCapture(0) | ||
8 | + | ||
9 | +if not webcam.isOpened(): | ||
10 | + print("Could not open webcam") | ||
11 | + exit() | ||
12 | + | ||
13 | + | ||
14 | +# loop through frames | ||
15 | +while webcam.isOpened(): | ||
16 | + | ||
17 | + # read frame from webcam | ||
18 | + status, frame = webcam.read() | ||
19 | + | ||
20 | + if not status: | ||
21 | + break | ||
22 | + | ||
23 | + # apply object detection (물체 검출) | ||
24 | + bbox, label, conf = cv.detect_common_objects(frame) | ||
25 | + | ||
26 | + # draw bounding box over detected objects (검출된 물체 가장자리에 바운딩 박스 그리기) | ||
27 | + out = draw_bbox(frame, bbox, label, conf, write_conf=True) | ||
28 | + | ||
29 | + # display output | ||
30 | + cv2.imshow("Real-time object detection", out) | ||
31 | + | ||
32 | + # press "Q" to stop | ||
33 | + if cv2.waitKey(1) & 0xFF == ord('q'): | ||
34 | + break | ||
35 | + | ||
36 | +# release resources | ||
37 | +webcam.release() | ||
38 | +cv2.destroyAllWindows() |
Code/rotate_image.py
0 → 100644
1 | +import numpy as np | ||
2 | +import imutils | ||
3 | +import cv2 | ||
4 | + | ||
5 | +def rotate_image(image, template, maxFeatures=500, keepPercent=0.2, debug=False): | ||
6 | + gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | ||
7 | + gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) | ||
8 | + | ||
9 | + orb = cv2.ORB_create(maxFeatures) | ||
10 | + (kpsA, descsA) = orb.detectAndCompute(gray_image, None) | ||
11 | + (kpsB, descsB) = orb.detectAndCompute(gray_template, None) | ||
12 | + | ||
13 | + method = cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING | ||
14 | + matcher = cv2.DescriptorMatcher_create(method) | ||
15 | + matches = matcher.match(descsA, descsB, None) | ||
16 | + | ||
17 | + matches = sorted(matches, key=lambda x: x.distance) | ||
18 | + | ||
19 | + keep = int(len(matches) * keepPercent) | ||
20 | + matches = matches[:keep] | ||
21 | + | ||
22 | + if debug: | ||
23 | + matchedVis = cv2.drawMatches(image, kpsA, template, kpsB, matches, None) | ||
24 | + matchedVis = imutils.resize(matchedVis, width=1000) | ||
25 | + cv2.imshow("Matched Keypoints", matchedVis) | ||
26 | + cv2.waitKey(0) | ||
27 | + | ||
28 | + ptsA = np.zeros((len(matches), 2), dtype=float) | ||
29 | + ptsB = np.zeros((len(matches), 2), dtype=float) | ||
30 | + | ||
31 | + for (i, m) in enumerate(matches): | ||
32 | + ptsA[i] = kpsA[m.queryIdx].pt | ||
33 | + ptsB[i] = kpsB[m.trainIdx].pt | ||
34 | + | ||
35 | + (H, mask) = cv2.findHomography(ptsA, ptsB, method=cv2.RANSAC) | ||
36 | + | ||
37 | + (h, w) = template.shape[:2] | ||
38 | + aligned = cv2.warpPerspective(image, H, (w, h)) | ||
39 | + | ||
40 | + return aligned |
Code/yolo.py
0 → 100644
1 | +# -*- coding: utf-8 -*- | ||
2 | +""" | ||
3 | +Class definition of YOLO_v3 style detection model on image and video | ||
4 | +""" | ||
5 | + | ||
6 | +import colorsys | ||
7 | +import os | ||
8 | +from timeit import default_timer as timer | ||
9 | + | ||
10 | +import numpy as np | ||
11 | +from keras import backend as K | ||
12 | +from keras.models import load_model | ||
13 | +from keras.layers import Input | ||
14 | +from PIL import Image, ImageFont, ImageDraw | ||
15 | +from tensorflow.compat.v1.keras import backend as K | ||
16 | + | ||
17 | +from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body | ||
18 | +from yolo3.utils import letterbox_image | ||
19 | +import os | ||
20 | +from keras.utils import multi_gpu_model | ||
21 | + | ||
22 | +class YOLO(object): | ||
23 | + _defaults = { | ||
24 | + "model_path": 'model_data/yolo.h5', | ||
25 | + "anchors_path": 'model_data/yolo_anchors.txt', | ||
26 | + "classes_path": 'model_data/coco_classes.txt', | ||
27 | + "score" : 0.3, | ||
28 | + "iou" : 0.45, | ||
29 | + "model_image_size" : (416, 416), | ||
30 | + "gpu_num" : 1, | ||
31 | + } | ||
32 | + | ||
33 | + @classmethod | ||
34 | + def get_defaults(cls, n): | ||
35 | + if n in cls._defaults: | ||
36 | + return cls._defaults[n] | ||
37 | + else: | ||
38 | + return "Unrecognized attribute name '" + n + "'" | ||
39 | + | ||
40 | + def __init__(self, **kwargs): | ||
41 | + self.__dict__.update(self._defaults) # set up default values | ||
42 | + self.__dict__.update(kwargs) # and update with user overrides | ||
43 | + self.class_names = self._get_class() | ||
44 | + self.anchors = self._get_anchors() | ||
45 | + self.sess = K.get_session() | ||
46 | + self.boxes, self.scores, self.classes = self.generate() | ||
47 | + | ||
48 | + def _get_class(self): | ||
49 | + classes_path = os.path.expanduser(self.classes_path) | ||
50 | + with open(classes_path) as f: | ||
51 | + class_names = f.readlines() | ||
52 | + class_names = [c.strip() for c in class_names] | ||
53 | + return class_names | ||
54 | + | ||
55 | + def _get_anchors(self): | ||
56 | + anchors_path = os.path.expanduser(self.anchors_path) | ||
57 | + with open(anchors_path) as f: | ||
58 | + anchors = f.readline() | ||
59 | + anchors = [float(x) for x in anchors.split(',')] | ||
60 | + return np.array(anchors).reshape(-1, 2) | ||
61 | + | ||
62 | + def generate(self): | ||
63 | + model_path = os.path.expanduser(self.model_path) | ||
64 | + assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' | ||
65 | + | ||
66 | + # Load model, or construct model and load weights. | ||
67 | + num_anchors = len(self.anchors) | ||
68 | + num_classes = len(self.class_names) | ||
69 | + is_tiny_version = num_anchors==6 # default setting | ||
70 | + try: | ||
71 | + self.yolo_model = load_model(model_path, compile=False) | ||
72 | + except: | ||
73 | + self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \ | ||
74 | + if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) | ||
75 | + self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match | ||
76 | + else: | ||
77 | + assert self.yolo_model.layers[-1].output_shape[-1] == \ | ||
78 | + num_anchors/len(self.yolo_model.output) * (num_classes + 5), \ | ||
79 | + 'Mismatch between model and given anchor and class sizes' | ||
80 | + | ||
81 | + print('{} model, anchors, and classes loaded.'.format(model_path)) | ||
82 | + | ||
83 | + # Generate colors for drawing bounding boxes. | ||
84 | + hsv_tuples = [(x / len(self.class_names), 1., 1.) | ||
85 | + for x in range(len(self.class_names))] | ||
86 | + self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) | ||
87 | + self.colors = list( | ||
88 | + map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), | ||
89 | + self.colors)) | ||
90 | + np.random.seed(10101) # Fixed seed for consistent colors across runs. | ||
91 | + np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes. | ||
92 | + np.random.seed(None) # Reset seed to default. | ||
93 | + | ||
94 | + # Generate output tensor targets for filtered bounding boxes. | ||
95 | + self.input_image_shape = K.placeholder(shape=(2, )) | ||
96 | + if self.gpu_num>=2: | ||
97 | + self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) | ||
98 | + boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, | ||
99 | + len(self.class_names), self.input_image_shape, | ||
100 | + score_threshold=self.score, iou_threshold=self.iou) | ||
101 | + return boxes, scores, classes | ||
102 | + | ||
103 | + def detect_image(self, image): | ||
104 | + start = timer() | ||
105 | + | ||
106 | + if self.model_image_size != (None, None): | ||
107 | + assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required' | ||
108 | + assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required' | ||
109 | + boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size))) | ||
110 | + else: | ||
111 | + new_image_size = (image.width - (image.width % 32), | ||
112 | + image.height - (image.height % 32)) | ||
113 | + boxed_image = letterbox_image(image, new_image_size) | ||
114 | + image_data = np.array(boxed_image, dtype='float32') | ||
115 | + | ||
116 | + print(image_data.shape) | ||
117 | + image_data /= 255. | ||
118 | + image_data = np.expand_dims(image_data, 0) # Add batch dimension. | ||
119 | + | ||
120 | + out_boxes, out_scores, out_classes = self.sess.run( | ||
121 | + [self.boxes, self.scores, self.classes], | ||
122 | + feed_dict={ | ||
123 | + self.yolo_model.input: image_data, | ||
124 | + self.input_image_shape: [image.size[1], image.size[0]], | ||
125 | + K.learning_phase(): 0 | ||
126 | + }) | ||
127 | + | ||
128 | + print('Found {} boxes for {}'.format(len(out_boxes), 'img')) | ||
129 | + | ||
130 | + font = ImageFont.truetype(font='font/FiraMono-Medium.otf', | ||
131 | + size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) | ||
132 | + thickness = (image.size[0] + image.size[1]) // 300 | ||
133 | + | ||
134 | + for i, c in reversed(list(enumerate(out_classes))): | ||
135 | + predicted_class = self.class_names[c] | ||
136 | + box = out_boxes[i] | ||
137 | + score = out_scores[i] | ||
138 | + | ||
139 | + label = '{} {:.2f}'.format(predicted_class, score) | ||
140 | + draw = ImageDraw.Draw(image) | ||
141 | + label_size = draw.textsize(label, font) | ||
142 | + | ||
143 | + top, left, bottom, right = box | ||
144 | + top = max(0, np.floor(top + 0.5).astype('int32')) | ||
145 | + left = max(0, np.floor(left + 0.5).astype('int32')) | ||
146 | + bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) | ||
147 | + right = min(image.size[0], np.floor(right + 0.5).astype('int32')) | ||
148 | + print(label, (left, top), (right, bottom)) | ||
149 | + | ||
150 | + if top - label_size[1] >= 0: | ||
151 | + text_origin = np.array([left, top - label_size[1]]) | ||
152 | + else: | ||
153 | + text_origin = np.array([left, top + 1]) | ||
154 | + | ||
155 | + # My kingdom for a good redistributable image drawing library. | ||
156 | + for i in range(thickness): | ||
157 | + draw.rectangle( | ||
158 | + [left + i, top + i, right - i, bottom - i], | ||
159 | + outline=self.colors[c]) | ||
160 | + draw.rectangle( | ||
161 | + [tuple(text_origin), tuple(text_origin + label_size)], | ||
162 | + fill=self.colors[c]) | ||
163 | + draw.text(text_origin, label, fill=(0, 0, 0), font=font) | ||
164 | + del draw | ||
165 | + | ||
166 | + end = timer() | ||
167 | + print(end - start) | ||
168 | + return image | ||
169 | + | ||
170 | + def close_session(self): | ||
171 | + self.sess.close() | ||
172 | + | ||
173 | +def detect_video(yolo, video_path, output_path=""): | ||
174 | + import cv2 | ||
175 | + vid = cv2.VideoCapture(video_path) | ||
176 | + if not vid.isOpened(): | ||
177 | + raise IOError("Couldn't open webcam or video") | ||
178 | + video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC)) | ||
179 | + video_fps = vid.get(cv2.CAP_PROP_FPS) | ||
180 | + video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), | ||
181 | + int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))) | ||
182 | + isOutput = True if output_path != "" else False | ||
183 | + if isOutput: | ||
184 | + print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size)) | ||
185 | + out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size) | ||
186 | + accum_time = 0 | ||
187 | + curr_fps = 0 | ||
188 | + fps = "FPS: ??" | ||
189 | + prev_time = timer() | ||
190 | + while True: | ||
191 | + return_value, frame = vid.read() | ||
192 | + image = Image.fromarray(frame) | ||
193 | + image = yolo.detect_image(image) | ||
194 | + result = np.asarray(image) | ||
195 | + curr_time = timer() | ||
196 | + exec_time = curr_time - prev_time | ||
197 | + prev_time = curr_time | ||
198 | + accum_time = accum_time + exec_time | ||
199 | + curr_fps = curr_fps + 1 | ||
200 | + if accum_time > 1: | ||
201 | + accum_time = accum_time - 1 | ||
202 | + fps = "FPS: " + str(curr_fps) | ||
203 | + curr_fps = 0 | ||
204 | + cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, | ||
205 | + fontScale=0.50, color=(255, 0, 0), thickness=2) | ||
206 | + cv2.namedWindow("result", cv2.WINDOW_NORMAL) | ||
207 | + cv2.imshow("result", result) | ||
208 | + if isOutput: | ||
209 | + out.write(result) | ||
210 | + if cv2.waitKey(1) & 0xFF == ord('q'): | ||
211 | + break | ||
212 | + yolo.close_session() | ||
213 | + |
Code/yolov3-tiny.cfg
0 → 100644
1 | +[net] | ||
2 | +# Testing | ||
3 | +#batch=1 | ||
4 | +#subdivisions=1 | ||
5 | +# Training | ||
6 | +batch=64 | ||
7 | +subdivisions=8 | ||
8 | +width=416 | ||
9 | +height=416 | ||
10 | +channels=3 | ||
11 | +momentum=0.9 | ||
12 | +decay=0.0005 | ||
13 | +angle=0 | ||
14 | +saturation = 1.5 | ||
15 | +exposure = 1.5 | ||
16 | +hue=.1 | ||
17 | + | ||
18 | +learning_rate=0.001 | ||
19 | +burn_in=1000 | ||
20 | +max_batches = 500200 | ||
21 | +policy=steps | ||
22 | +steps=400000,450000 | ||
23 | +scales=.1,.1 | ||
24 | + | ||
25 | +[convolutional] | ||
26 | +batch_normalize=1 | ||
27 | +filters=16 | ||
28 | +size=3 | ||
29 | +stride=1 | ||
30 | +pad=1 | ||
31 | +activation=leaky | ||
32 | + | ||
33 | +[maxpool] | ||
34 | +size=2 | ||
35 | +stride=2 | ||
36 | + | ||
37 | +[convolutional] | ||
38 | +batch_normalize=1 | ||
39 | +filters=32 | ||
40 | +size=3 | ||
41 | +stride=1 | ||
42 | +pad=1 | ||
43 | +activation=leaky | ||
44 | + | ||
45 | +[maxpool] | ||
46 | +size=2 | ||
47 | +stride=2 | ||
48 | + | ||
49 | +[convolutional] | ||
50 | +batch_normalize=1 | ||
51 | +filters=64 | ||
52 | +size=3 | ||
53 | +stride=1 | ||
54 | +pad=1 | ||
55 | +activation=leaky | ||
56 | + | ||
57 | +[maxpool] | ||
58 | +size=2 | ||
59 | +stride=2 | ||
60 | + | ||
61 | +[convolutional] | ||
62 | +batch_normalize=1 | ||
63 | +filters=128 | ||
64 | +size=3 | ||
65 | +stride=1 | ||
66 | +pad=1 | ||
67 | +activation=leaky | ||
68 | + | ||
69 | +[maxpool] | ||
70 | +size=2 | ||
71 | +stride=2 | ||
72 | + | ||
73 | +[convolutional] | ||
74 | +batch_normalize=1 | ||
75 | +filters=256 | ||
76 | +size=3 | ||
77 | +stride=1 | ||
78 | +pad=1 | ||
79 | +activation=leaky | ||
80 | + | ||
81 | +[maxpool] | ||
82 | +size=2 | ||
83 | +stride=2 | ||
84 | + | ||
85 | +[convolutional] | ||
86 | +batch_normalize=1 | ||
87 | +filters=512 | ||
88 | +size=3 | ||
89 | +stride=1 | ||
90 | +pad=1 | ||
91 | +activation=leaky | ||
92 | + | ||
93 | +[maxpool] | ||
94 | +size=2 | ||
95 | +stride=1 | ||
96 | + | ||
97 | +[convolutional] | ||
98 | +batch_normalize=1 | ||
99 | +filters=1024 | ||
100 | +size=3 | ||
101 | +stride=1 | ||
102 | +pad=1 | ||
103 | +activation=leaky | ||
104 | + | ||
105 | +########### | ||
106 | + | ||
107 | +[convolutional] | ||
108 | +batch_normalize=1 | ||
109 | +filters=256 | ||
110 | +size=1 | ||
111 | +stride=1 | ||
112 | +pad=1 | ||
113 | +activation=leaky | ||
114 | + | ||
115 | +[convolutional] | ||
116 | +batch_normalize=1 | ||
117 | +filters=512 | ||
118 | +size=3 | ||
119 | +stride=1 | ||
120 | +pad=1 | ||
121 | +activation=leaky | ||
122 | + | ||
123 | +[convolutional] | ||
124 | +size=1 | ||
125 | +stride=1 | ||
126 | +pad=1 | ||
127 | +filters=18 | ||
128 | +activation=linear | ||
129 | + | ||
130 | + | ||
131 | + | ||
132 | +[yolo] | ||
133 | +mask = 3,4,5 | ||
134 | +anchors = 54,245, 53,266, 60,253, 58,271, 62,270, 66,258, 68,280, 73,266 | ||
135 | +classes=1 | ||
136 | +num=8 | ||
137 | +jitter=.3 | ||
138 | +ignore_thresh = .7 | ||
139 | +truth_thresh = 1 | ||
140 | +random=1 | ||
141 | + | ||
142 | +[route] | ||
143 | +layers = -4 | ||
144 | + | ||
145 | +[convolutional] | ||
146 | +batch_normalize=1 | ||
147 | +filters=128 | ||
148 | +size=1 | ||
149 | +stride=1 | ||
150 | +pad=1 | ||
151 | +activation=leaky | ||
152 | + | ||
153 | +[upsample] | ||
154 | +stride=2 | ||
155 | + | ||
156 | +[route] | ||
157 | +layers = -1, 8 | ||
158 | + | ||
159 | +[convolutional] | ||
160 | +batch_normalize=1 | ||
161 | +filters=256 | ||
162 | +size=3 | ||
163 | +stride=1 | ||
164 | +pad=1 | ||
165 | +activation=leaky | ||
166 | + | ||
167 | +[convolutional] | ||
168 | +size=1 | ||
169 | +stride=1 | ||
170 | +pad=1 | ||
171 | +filters=18 | ||
172 | +activation=linear | ||
173 | + | ||
174 | +[yolo] | ||
175 | +mask = 0,1,2 | ||
176 | +anchors = 54,245, 53,266, 60,253, 58,271, 62,270, 66,258, 68,280, 73,266 | ||
177 | +classes=1 | ||
178 | +num=8 | ||
179 | +jitter=.3 | ||
180 | +ignore_thresh = .7 | ||
181 | +truth_thresh = 1 | ||
182 | +random=1 |
Code/yolov3-tiny_best.weights
0 → 100644
This file is too large to display.
Code/yolov3-tiny_last.weights
0 → 100644
This file is too large to display.
-
Please register or login to post a comment