ocr_image.py
1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import rotate_image
from collections import namedtuple
import pytesseract
import argparse
import imutils
import cv2
def ocr(image, template) :
print("[Loading...] OCR Location Setting")
OCRLocation = namedtuple("OCRLocation", ["id", "bbox", "filter_keywords"])
OCR_Locations = [
OCRLocation("name", (27, 96, 60, 20), []),
OCRLocation("address", (27, 115, 276, 21), []),
OCRLocation("detail_address", (28, 134, 409, 36), []),
]
print("[Loading...] aligning images")
aligned = rotate_image.rotate_image(image, template)
print("[Loading...] Proceeding OCR")
parsingResults = []
for loc in OCR_Locations:
(x, y, w, h) = loc.bbox
roi = aligned[y:y+h, x:x+w]
cv2.imshow(loc.id, roi)
cv2.waitKey(0)
rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
text = pytesseract.image_to_string(rgb, lang='Hangul')
for line in text.split("\n"):
if len(line) == 0:
continue
lower = line.lower()
count = sum([lower.count(x) for x in loc.filter_keywords])
if count == 0:
parsingResults.append((loc, line))
results = {}
for (loc, line) in parsingResults:
r = results.get(loc.id, None)
if r is None:
results[loc.id] = (line, loc._asdict())
else:
(existingText, loc) = r
text = "{}\n{}".format(existingText, line)
results[loc["id"]] = (text, loc)
for (locID, result) in results.items():
(text, loc) = result
print(loc["id"])
print("=" * len(loc["id"]))
print("{}\n".format(text))
cv2.imshow("Input", imutils.resize(image))
cv2.imshow("Output", imutils.resize(aligned))
cv2.waitKey(0)
return results