-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr_utils.py
89 lines (77 loc) · 3.02 KB
/
ocr_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import cv2
import numpy as np
from PIL import Image
tessdata_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'tessdata'))
os.environ['TESSDATA_PREFIX'] = tessdata_dir
import pytesseract
def load_image(image, temp_dir='./tmp', min_width=500, dpi=300):
"""Resize image with specific dpi
"""
if isinstance(image, str):
image = Image.open(image)
elif isinstance(image, np.ndarray):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = Image.fromarray(image)
w, h = image.size
scale = 1 if w > min_width else min_width / w
scale = 4
w = int(w * scale)
h = int(h * scale)
image = image.resize((w, h), Image.ANTIALIAS)
if not os.path.exists(temp_dir):
os.mkdir(temp_dir)
filename = os.path.join(temp_dir, 'digit_ocr.png')
dpi = dpi if isinstance(dpi, (tuple, list)) else (dpi, dpi)
image.save(filename, dpi=dpi)
dpi_image = Image.open(filename)
return dpi_image
def extract_data(image, conf_thresh=80, col_name=None, debug=False):
"""Extract data from the given image.
Args
:image: numpy array
:conf_thresh: Confidence thresh
:col_name: Bid or Ask column?
:debug: Enable debug mode if true
Returns
:results: A list of detected data.
"""
image = load_image(image)
rgb = np.array(image)
gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
h_kernel = np.ones((1, int(rgb.shape[1] * 0.4)))
detected_lines = cv2.morphologyEx(cv2.bitwise_not(thresh), cv2.MORPH_OPEN, h_kernel, iterations=1)
and_thresh = thresh + detected_lines
and_thresh = cv2.erode(and_thresh, np.ones((3, 3)), iterations=1)
and_thresh = cv2.dilate(and_thresh, np.ones((5, 5)), iterations=1)
and_thresh = cv2.erode(and_thresh, np.ones((3, 3)), iterations=1)
if debug:
if col_name is None:
col_name = ''
cv2.imwrite(f'thresh{col_name}.png', thresh)
cv2.imwrite(f'and{col_name}.png', and_thresh)
cv2.imwrite(f'det{col_name}.png', detected_lines)
data = pytesseract.image_to_data(and_thresh, lang='digits_comma', config='--psm 6', output_type=pytesseract.Output.DICT)
num_texts = len(data['level'])
results = []
for i in range(num_texts):
x1, y1 = int(data['left'][i]), int(data['top'][i])
w, h = int(data['width'][i]), int(data['height'][i])
x2 = x1 + w
y2 = y1 + h
text = data['text'][i]
conf = data['conf'][i]
if float(conf) > conf_thresh:
results.append((x1, y1, x2, y2, text, conf))
return results
def draw_results(image, results):
"""
"""
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
for text_box in results:
x1, y1, x2, y2, text, conf = text_box
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(image, text, (x1, y2), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
return image