최지우

add codes

import cv2
import numpy as np
import serial
# 웹캠 신호 받기
VideoSignal = cv2.VideoCapture(0)
# YOLO 가중치 파일과 CFG 파일 로드
YOLO_net = cv2.dnn.readNet('yolov3-tiny_last.weights','yolov3-tiny.cfg')
# YOLO NETWORK 재구성
classes = []
with open("obj.names", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = YOLO_net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in YOLO_net.getUnconnectedOutLayers()]
while True:
# 웹캠 프레임
ret, frame = VideoSignal.read()
h, w, c = frame.shape
# YOLO 입력
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0),
True, crop=False)
YOLO_net.setInput(blob)
outs = YOLO_net.forward(output_layers)
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.3:
# Object detected
center_x = int(detection[0] * w)
center_y = int(detection[1] * h)
dw = int(detection[2] * w)
dh = int(detection[3] * h)
# Rectangle coordinate
x = int(center_x - dw / 2)
y = int(center_y - dh / 2)
boxes.append([x, y, dw, dh])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.45, 0.4)
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = 'box'
score = confidences[i]
# 경계상자와 클래스 정보 이미지에 입력
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 0, 255), 5)
cv2.putText(frame, label, (x, y - 20), cv2.FONT_ITALIC, 0.5,
(255, 255, 255), 1)
cv2.imshow("YOLOv3", frame)
if cv2.waitKey(100) > 0:
break
from IPython.display import display
from PIL import Image
from yolo import YOLO
import cvlib as cv
from cvlib.object_detection import draw_bbox
import cv2
import tensorflow.compat.v1.keras.backend as K
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
def objectDetection(file, model_path, class_path):
yolo = YOLO(model_path=model_path, classes_path=class_path, anchors_path='model_data/tiny_yolo_anchors.txt')
image = Image.open(file)
result_image = yolo.detect_image(image)
result_image.save('test.jpg','JPEG')
webcam = cv2.VideoCapture(0)
if not webcam.isOpened():
print("Could not open webcam")
exit()
yolo = YOLO(model_path='model_data/yolo_tiny_best.h5', classes_path='data/box/classes.txt', anchors_path='model_data/tiny_yolo_anchors.txt')
# loop through frames
while webcam.isOpened():
status, frame = webcam.read()
if not status:
break
'''
cv2.imwrite('frame.jpg', frame)
objectDetection('frame.jpg', 'model_data/yolo_tiny_best.h5', 'data/box/classes.txt')
out = cv2.imread('test.jpg')
'''
cv2.imwrite('frame.jpg', frame)
tst = Image.open('frame.jpg')
out = yolo.detect_image(tst)
out.save('test.jpg', 'JPEG')
out = cv2.imread('test.jpg')
cv2.imshow("Real-time object detection", out)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
webcam.release()
cv2.destroyAllWindows()
import serial
import cv2
import numpy as np
import ocr_image
ser = serial.Serial('/dev/ttyAMA0',115200)
if(ser.isOpen()):
print("Serial Communication in operation")
LiveCam = cv2.VideoCapture(0)
YOLO_net = cv2.dnn.readNet('yolov3-tiny_best.weights','yolov3-tiny.cfg')
classes = ['box']
layer_names = YOLO_net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in YOLO_net.getUnconnectedOutLayers()]
frame_num = 0
while LiveCam.isOpened():
ret, frame = LiveCam.read()
if ret is False:
print("No Video Input")
break
if frame_num != 20:
frame_num += 1
elif frame_num == 20:
frame_num = 0
h, w, c = frame.shape
blob = cv2.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
YOLO_net.setInput(blob)
outs = YOLO_net.forward(output_layers)
class_ids = []
confidences = []
boxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.4:
center_x = int(detection[0] * w)
center_y = int(detection[1] * h)
dw = int(detection[2] * w)
dh = int(detection[3] * h)
x = int(center_x - dw / 2)
y = int(center_y - dh / 2)
boxes.append([x, y, dw, dh])
confidences.append(float(confidence))
class_ids.append(class_id)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.45, 0.4)
if confidences:
bestscore = confidences.index(max(confidences))
best_x, best_y, best_w, best_h = boxes[bestscore]
if best_x > 320 :
print("오른쪽으로 이동")
ser.write(serial.to_bytes([int('1',16)]))
elif best_x + best_w < 320 :
print("왼쪽으로 이동")
ser.write(serial.to_bytes([int('2',16)]))
else :
print("직진")
cv2.imwrite('cap_img.jpg', frame)
ser.write(serial.to_bytes([int('3',16)]))
break
cv2.rectangle(frame, (best_x, best_y), (best_x + best_w, best_y + best_h), (0, 0, 255), 5)
cv2.putText(frame, 'box', (best_x, best_y - 20), cv2.FONT_ITALIC, 0.5, (255, 255, 255), 1)
cv2.imshow("YOLOv3", frame)
if cv2.waitKey(100) > 0:
break
'''
image = cv2.imread("cap_img.jpg")
template = cv2.imread("myform.jpg")
ocr_result = ocr_form.ocr(image, template)
(name, result) = ocr_result["name"]
(address, result) = ocr_result["address"]
(detail_address, result) = ocr_result["detail_address"]
name = name.replace(" ","")
address = address.replace(" ","")
detail_address = detail_address.replace(" ","")
print(name)
print(address)
print(detail_address)
'''
This file is too large to display.
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
10,14, 23,27, 37,58, 81,82, 135,169, 344,319
aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
pottedplant
sheep
sofa
train
tvmonitor
10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
This file is too large to display.
This file is too large to display.
classes = 1
train = data/train.txt
valid = data/train.txt
names = data/obj.names
backup = backup/
box
\ No newline at end of file
import rotate_image
from collections import namedtuple
import pytesseract
import argparse
import imutils
import cv2
def ocr(image, template) :
print("[Loading...] OCR Location Setting")
OCRLocation = namedtuple("OCRLocation", ["id", "bbox", "filter_keywords"])
OCR_Locations = [
OCRLocation("name", (27, 96, 60, 20), []),
OCRLocation("address", (27, 115, 276, 21), []),
OCRLocation("detail_address", (28, 134, 409, 36), []),
]
print("[Loading...] aligning images")
aligned = rotate_image.rotate_image(image, template)
print("[Loading...] Proceeding OCR")
parsingResults = []
for loc in OCR_Locations:
(x, y, w, h) = loc.bbox
roi = aligned[y:y+h, x:x+w]
cv2.imshow(loc.id, roi)
cv2.waitKey(0)
rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
text = pytesseract.image_to_string(rgb, lang='Hangul')
for line in text.split("\n"):
if len(line) == 0:
continue
lower = line.lower()
count = sum([lower.count(x) for x in loc.filter_keywords])
if count == 0:
parsingResults.append((loc, line))
results = {}
for (loc, line) in parsingResults:
r = results.get(loc.id, None)
if r is None:
results[loc.id] = (line, loc._asdict())
else:
(existingText, loc) = r
text = "{}\n{}".format(existingText, line)
results[loc["id"]] = (text, loc)
for (locID, result) in results.items():
(text, loc) = result
print(loc["id"])
print("=" * len(loc["id"]))
print("{}\n".format(text))
cv2.imshow("Input", imutils.resize(image))
cv2.imshow("Output", imutils.resize(aligned))
cv2.waitKey(0)
return results
import cv2
import numpy as np
import ocr_form
image = cv2.imread("myimg1.png")
template = cv2.imread("myform.jpg")
ocr_result = ocr_form.ocr(image, template)
(name, result) = ocr_result["name"]
(address, result) = ocr_result["address"]
(detail_address, result) = ocr_result["detail_address"]
name = name.replace(" ","")
address = address.replace(" ","")
detail_address = detail_address.replace(" ","")
print(name)
print(address)
print(detail_address)
# import necessary packages
import cvlib as cv
from cvlib.object_detection import draw_bbox
import cv2
# open webcam (웹캠 열기)
webcam = cv2.VideoCapture(0)
if not webcam.isOpened():
print("Could not open webcam")
exit()
# loop through frames
while webcam.isOpened():
# read frame from webcam
status, frame = webcam.read()
if not status:
break
# apply object detection (물체 검출)
bbox, label, conf = cv.detect_common_objects(frame)
# draw bounding box over detected objects (검출된 물체 가장자리에 바운딩 박스 그리기)
out = draw_bbox(frame, bbox, label, conf, write_conf=True)
# display output
cv2.imshow("Real-time object detection", out)
# press "Q" to stop
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# release resources
webcam.release()
cv2.destroyAllWindows()
import numpy as np
import imutils
import cv2
def rotate_image(image, template, maxFeatures=500, keepPercent=0.2, debug=False):
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray_template = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
orb = cv2.ORB_create(maxFeatures)
(kpsA, descsA) = orb.detectAndCompute(gray_image, None)
(kpsB, descsB) = orb.detectAndCompute(gray_template, None)
method = cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING
matcher = cv2.DescriptorMatcher_create(method)
matches = matcher.match(descsA, descsB, None)
matches = sorted(matches, key=lambda x: x.distance)
keep = int(len(matches) * keepPercent)
matches = matches[:keep]
if debug:
matchedVis = cv2.drawMatches(image, kpsA, template, kpsB, matches, None)
matchedVis = imutils.resize(matchedVis, width=1000)
cv2.imshow("Matched Keypoints", matchedVis)
cv2.waitKey(0)
ptsA = np.zeros((len(matches), 2), dtype=float)
ptsB = np.zeros((len(matches), 2), dtype=float)
for (i, m) in enumerate(matches):
ptsA[i] = kpsA[m.queryIdx].pt
ptsB[i] = kpsB[m.trainIdx].pt
(H, mask) = cv2.findHomography(ptsA, ptsB, method=cv2.RANSAC)
(h, w) = template.shape[:2]
aligned = cv2.warpPerspective(image, H, (w, h))
return aligned
# -*- coding: utf-8 -*-
"""
Class definition of YOLO_v3 style detection model on image and video
"""
import colorsys
import os
from timeit import default_timer as timer
import numpy as np
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from PIL import Image, ImageFont, ImageDraw
from tensorflow.compat.v1.keras import backend as K
from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
from yolo3.utils import letterbox_image
import os
from keras.utils import multi_gpu_model
class YOLO(object):
_defaults = {
"model_path": 'model_data/yolo.h5',
"anchors_path": 'model_data/yolo_anchors.txt',
"classes_path": 'model_data/coco_classes.txt',
"score" : 0.3,
"iou" : 0.45,
"model_image_size" : (416, 416),
"gpu_num" : 1,
}
@classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
def __init__(self, **kwargs):
self.__dict__.update(self._defaults) # set up default values
self.__dict__.update(kwargs) # and update with user overrides
self.class_names = self._get_class()
self.anchors = self._get_anchors()
self.sess = K.get_session()
self.boxes, self.scores, self.classes = self.generate()
def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def _get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
def generate(self):
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
# Load model, or construct model and load weights.
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
is_tiny_version = num_anchors==6 # default setting
try:
self.yolo_model = load_model(model_path, compile=False)
except:
self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \
if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match
else:
assert self.yolo_model.layers[-1].output_shape[-1] == \
num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
'Mismatch between model and given anchor and class sizes'
print('{} model, anchors, and classes loaded.'.format(model_path))
# Generate colors for drawing bounding boxes.
hsv_tuples = [(x / len(self.class_names), 1., 1.)
for x in range(len(self.class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
self.colors))
np.random.seed(10101) # Fixed seed for consistent colors across runs.
np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.
np.random.seed(None) # Reset seed to default.
# Generate output tensor targets for filtered bounding boxes.
self.input_image_shape = K.placeholder(shape=(2, ))
if self.gpu_num>=2:
self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
len(self.class_names), self.input_image_shape,
score_threshold=self.score, iou_threshold=self.iou)
return boxes, scores, classes
def detect_image(self, image):
start = timer()
if self.model_image_size != (None, None):
assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
else:
new_image_size = (image.width - (image.width % 32),
image.height - (image.height % 32))
boxed_image = letterbox_image(image, new_image_size)
image_data = np.array(boxed_image, dtype='float32')
print(image_data.shape)
image_data /= 255.
image_data = np.expand_dims(image_data, 0) # Add batch dimension.
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
thickness = (image.size[0] + image.size[1]) // 300
for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c]
box = out_boxes[i]
score = out_scores[i]
label = '{} {:.2f}'.format(predicted_class, score)
draw = ImageDraw.Draw(image)
label_size = draw.textsize(label, font)
top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
print(label, (left, top), (right, bottom))
if top - label_size[1] >= 0:
text_origin = np.array([left, top - label_size[1]])
else:
text_origin = np.array([left, top + 1])
# My kingdom for a good redistributable image drawing library.
for i in range(thickness):
draw.rectangle(
[left + i, top + i, right - i, bottom - i],
outline=self.colors[c])
draw.rectangle(
[tuple(text_origin), tuple(text_origin + label_size)],
fill=self.colors[c])
draw.text(text_origin, label, fill=(0, 0, 0), font=font)
del draw
end = timer()
print(end - start)
return image
def close_session(self):
self.sess.close()
def detect_video(yolo, video_path, output_path=""):
import cv2
vid = cv2.VideoCapture(video_path)
if not vid.isOpened():
raise IOError("Couldn't open webcam or video")
video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
video_fps = vid.get(cv2.CAP_PROP_FPS)
video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
isOutput = True if output_path != "" else False
if isOutput:
print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
accum_time = 0
curr_fps = 0
fps = "FPS: ??"
prev_time = timer()
while True:
return_value, frame = vid.read()
image = Image.fromarray(frame)
image = yolo.detect_image(image)
result = np.asarray(image)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=0.50, color=(255, 0, 0), thickness=2)
cv2.namedWindow("result", cv2.WINDOW_NORMAL)
cv2.imshow("result", result)
if isOutput:
out.write(result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
yolo.close_session()
[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=64
subdivisions=8
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=1
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
###########
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=18
activation=linear
[yolo]
mask = 3,4,5
anchors = 54,245, 53,266, 60,253, 58,271, 62,270, 66,258, 68,280, 73,266
classes=1
num=8
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=18
activation=linear
[yolo]
mask = 0,1,2
anchors = 54,245, 53,266, 60,253, 58,271, 62,270, 66,258, 68,280, 73,266
classes=1
num=8
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
This file is too large to display.
This file is too large to display.