forked from JaidedAI/EasyOCR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example.py
149 lines (114 loc) · 4.7 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
from PIL import Image
import easyocr
import onnxruntime
import torch
import numpy as np
from torch.autograd import Variable
from easyocr.imgproc import resize_aspect_ratio
import cv2
import math
import warnings
warnings.filterwarnings('ignore')
def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text, estimate_num_chars=False):
# prepare data
linkmap = linkmap.copy()
textmap = textmap.copy()
img_h, img_w = textmap.shape
""" labeling method """
_, text_score = cv2.threshold(textmap, low_text, 1, 0)
_, link_score = cv2.threshold(linkmap, link_threshold, 1, 0)
text_score_comb = np.clip(text_score + link_score, 0, 1)
nLabels, labels, stats, _ = cv2.connectedComponentsWithStats(text_score_comb.astype(np.uint8), connectivity=4)
det = []
for k in range(1,nLabels):
# size filtering
size = stats[k, cv2.CC_STAT_AREA]
if size < 10: continue
# thresholding
if np.max(textmap[labels==k]) < text_threshold: continue
# make segmentation map
segmap = np.zeros(textmap.shape, dtype=np.uint8)
segmap[labels==k] = 255
segmap[np.logical_and(link_score==1, text_score==0)] = 0 # remove link area
x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP]
w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT]
niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1
# boundary check
if sx < 0 : sx = 0
if sy < 0 : sy = 0
if ex >= img_w: ex = img_w
if ey >= img_h: ey = img_h
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1 + niter, 1 + niter))
segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel)
# make box
np_contours = np.roll(np.array(np.where(segmap!=0)),1,axis=0).transpose().reshape(-1,2)
rectangle = cv2.minAreaRect(np_contours)
box = cv2.boxPoints(rectangle)
# align diamond-shape
w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2])
box_ratio = max(w, h) / (min(w, h) + 1e-5)
if abs(1 - box_ratio) <= 0.1:
l, r = min(np_contours[:,0]), max(np_contours[:,0])
t, b = min(np_contours[:,1]), max(np_contours[:,1])
box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32)
# make clock-wise order
startidx = box.sum(axis=1).argmin()
box = np.roll(box, 4-startidx, 0)
box = np.array(box)
det.append(box)
return det
def to_numpy(tensor):
if tensor.requires_grad:
return tensor.detach().cpu().numpy()
return tensor.cpu().numpy()
def normalize(in_img, mean=(0.485, 0.456, 0.406), variance=(0.229, 0.224, 0.225)):
# should be RGB order
img = in_img.copy().astype(np.float32)
img -= np.array([mean[0] * 255.0, mean[1] * 255.0,
mean[2] * 255.0], dtype=np.float32)
img /= np.array([variance[0] * 255.0, variance[1] * 255.0,
variance[2] * 255.0], dtype=np.float32)
return img
def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2):
if len(polys) > 0:
polys = np.array(polys)
for k in range(len(polys)):
if polys[k] is not None:
polys[k] *= (ratio_w * ratio_net, ratio_h * ratio_net)
return polys
reader = easyocr.Reader(["en"], gpu=False)
# result = reader.readtext("examples/screen1.png")
# print(result)
result = reader.readtext("examples/japanese.jpg")
# print(result)
x = Image.open("examples/japanese.jpg").convert("RGB")
image = np.array(x)
# Image.fromarray(image_arrs[0]).show()
img_resized, target_ratio, size_heatmap = resize_aspect_ratio(
image, 2560, interpolation=cv2.INTER_LINEAR, mag_ratio=1.0)
ratio_h = ratio_w = 1 / target_ratio
x = normalize(img_resized)[None, ...]
x = Variable(torch.from_numpy(x).permute(0, 3, 1, 2)).to(
"cpu") # [b,h,w,c] to [b,c,h,w]
ort_session = onnxruntime.InferenceSession("detector_craft.onnx")
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
y, feature = ort_session.run(None, ort_inputs)
boxes_list = []
for out in y:
# make score and link map
score_text = out[:, :, 0]
score_link = out[:, :, 1]
# Post-processing
boxes = getDetBoxes_core(score_text, score_link, 0.7, 0.4, 0.4, False)
# coordinate adjustment
boxes = adjustResultCoordinates(boxes, ratio_w, ratio_h)
boxes_list.append(boxes)
result = []
for polys in boxes_list:
single_img_result = []
for i, box in enumerate(polys):
poly = np.array(box).astype(np.int32).reshape((-1))
single_img_result.append(poly)
result.append(single_img_result)
print(result)