diff --git a/doctr/datasets/imgur5k.py b/doctr/datasets/imgur5k.py index 3e7cf0e07..ce70c9f3b 100644 --- a/doctr/datasets/imgur5k.py +++ b/doctr/datasets/imgur5k.py @@ -112,7 +112,7 @@ def __init__( if ann["word"] != "." ] # (x, y) coordinates of top left, top right, bottom right, bottom left corners - box_targets = [cv2.boxPoints(((box[0], box[1]), (box[2], box[3]), box[4])) for box in _boxes] + box_targets = [cv2.boxPoints(((box[0], box[1]), (box[2], box[3]), box[4])) for box in _boxes] # type: ignore[arg-type] if not use_polygons: # xmin, ymin, xmax, ymax diff --git a/doctr/models/_utils.py b/doctr/models/_utils.py index 744087fdc..a4394363c 100644 --- a/doctr/models/_utils.py +++ b/doctr/models/_utils.py @@ -51,14 +51,14 @@ def estimate_orientation(img: np.ndarray, n_ct: int = 50, ratio_threshold_for_li if max_value <= 255 and min_value >= 0 and img.shape[-1] == 3: gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_img = cv2.medianBlur(gray_img, 5) - thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] + thresh = cv2.threshold(gray_img, thresh=0, maxval=255, type=cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # type: ignore[assignment] # try to merge words in lines (h, w) = img.shape[:2] k_x = max(1, (floor(w / 100))) k_y = max(1, (floor(h / 100))) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (k_x, k_y)) - thresh = cv2.dilate(thresh, kernel, iterations=1) + thresh = cv2.dilate(thresh, kernel, iterations=1) # type: ignore[assignment] # extract contours contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) diff --git a/doctr/models/detection/core.py b/doctr/models/detection/core.py index 970310111..63fa78615 100644 --- a/doctr/models/detection/core.py +++ b/doctr/models/detection/core.py @@ -57,7 +57,7 @@ def box_score(pred: np.ndarray, points: np.ndarray, assume_straight_pages: bool else: mask: np.ndarray = np.zeros((h, w), np.int32) - cv2.fillPoly(mask, [points.astype(np.int32)], 1.0) + cv2.fillPoly(mask, [points.astype(np.int32)], 1.0) # type: ignore[call-overload] product = pred * mask return np.sum(product) / np.count_nonzero(product) diff --git a/doctr/models/detection/differentiable_binarization/base.py b/doctr/models/detection/differentiable_binarization/base.py index 6d64c72d4..acb0bb314 100644 --- a/doctr/models/detection/differentiable_binarization/base.py +++ b/doctr/models/detection/differentiable_binarization/base.py @@ -83,7 +83,7 @@ def polygon_to_box( if len(expanded_points) < 1: return None # type: ignore[return-value] return ( - cv2.boundingRect(expanded_points) + cv2.boundingRect(expanded_points) # type: ignore[return-value] if self.assume_straight_pages else np.roll(cv2.boxPoints(cv2.minAreaRect(expanded_points)), -1, axis=0) ) @@ -233,7 +233,7 @@ def draw_thresh_map( padded_polygon: np.ndarray = np.array(padding.Execute(distance)[0]) # Fill the mask with 1 on the new padded polygon - cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0) + cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0) # type: ignore[call-overload] # Get min/max to recover polygon after distance computation xmin = padded_polygon[:, 0].min() @@ -351,7 +351,7 @@ def build_target( # seg_mask[idx, box[1] : box[3] + 1, box[0] : box[2] + 1, class_idx] = False seg_mask[idx, class_idx, box[1] : box[3] + 1, box[0] : box[2] + 1] = False continue - cv2.fillPoly(seg_target[idx, class_idx], [shrinked.astype(np.int32)], 1) + cv2.fillPoly(seg_target[idx, class_idx], [shrinked.astype(np.int32)], 1.0) # type: ignore[call-overload] # Draw on both thresh map and thresh mask poly, thresh_target[idx, class_idx], thresh_mask[idx, class_idx] = self.draw_thresh_map( diff --git a/doctr/models/detection/linknet/base.py b/doctr/models/detection/linknet/base.py index d488cc94a..8e60d06d4 100644 --- a/doctr/models/detection/linknet/base.py +++ b/doctr/models/detection/linknet/base.py @@ -81,7 +81,7 @@ def polygon_to_box( if len(expanded_points) < 1: return None # type: ignore[return-value] return ( - cv2.boundingRect(expanded_points) + cv2.boundingRect(expanded_points) # type: ignore[return-value] if self.assume_straight_pages else np.roll(cv2.boxPoints(cv2.minAreaRect(expanded_points)), -1, axis=0) ) @@ -246,7 +246,7 @@ def build_target( if shrunken.shape[0] <= 2 or not Polygon(shrunken).is_valid: seg_mask[idx, class_idx, box[1] : box[3] + 1, box[0] : box[2] + 1] = False continue - cv2.fillPoly(seg_target[idx, class_idx], [shrunken.astype(np.int32)], 1) + cv2.fillPoly(seg_target[idx, class_idx], [shrunken.astype(np.int32)], 1.0) # type: ignore[call-overload] # Don't forget to switch back to channel last if Tensorflow is used if channels_last: diff --git a/doctr/transforms/functional/base.py b/doctr/transforms/functional/base.py index 205e245f8..9084989b3 100644 --- a/doctr/transforms/functional/base.py +++ b/doctr/transforms/functional/base.py @@ -200,4 +200,4 @@ def create_shadow_mask( mask: np.ndarray = np.zeros((*target_shape, 1), dtype=np.uint8) mask = cv2.fillPoly(mask, [final_contour], (255,), lineType=cv2.LINE_AA)[..., 0] - return (mask / 255).astype(np.float32).clip(0, 1) * intensity_mask.astype(np.float32) + return (mask / 255).astype(np.float32).clip(0, 1) * intensity_mask.astype(np.float32) # type: ignore[operator] diff --git a/doctr/utils/geometry.py b/doctr/utils/geometry.py index 6f99bb107..17b04a739 100644 --- a/doctr/utils/geometry.py +++ b/doctr/utils/geometry.py @@ -102,7 +102,7 @@ def resolve_enclosing_rbbox(rbboxes: List[np.ndarray], intermed_size: int = 1024 # Convert to absolute for minAreaRect cloud *= intermed_size rect = cv2.minAreaRect(cloud.astype(np.int32)) - return cv2.boxPoints(rect) / intermed_size + return cv2.boxPoints(rect) / intermed_size # type: ignore[operator] def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray: diff --git a/doctr/utils/metrics.py b/doctr/utils/metrics.py index 96a021393..99054667b 100644 --- a/doctr/utils/metrics.py +++ b/doctr/utils/metrics.py @@ -274,7 +274,7 @@ def _rbox_to_mask(box: np.ndarray, shape: Tuple[int, int]) -> np.ndarray: else: abs_box = box abs_box[2:] = abs_box[2:] + 1 - cv2.fillPoly(mask, [abs_box - 1], 1) + cv2.fillPoly(mask, [abs_box - 1], 1.0) # type: ignore[call-overload] return mask.astype(bool) @@ -306,7 +306,7 @@ def rbox_to_mask(boxes: np.ndarray, shape: Tuple[int, int]) -> np.ndarray: # TODO: optimize slicing to improve vectorization for idx, _box in enumerate(abs_boxes): - cv2.fillPoly(masks[idx], [_box - 1], 1) + cv2.fillPoly(masks[idx], [_box - 1], 1.0) # type: ignore[call-overload] return masks.astype(bool)