diff --git a/docs/source/using_doctr/using_models.rst b/docs/source/using_doctr/using_models.rst index 6b7c9b474..850c7725d 100644 --- a/docs/source/using_doctr/using_models.rst +++ b/docs/source/using_doctr/using_models.rst @@ -298,6 +298,16 @@ For instance, this snippet instantiates an end-to-end ocr_predictor working with from doctr.model import ocr_predictor model = ocr_predictor('linknet_resnet18', pretrained=True, assume_straight_pages=False, preserve_aspect_ratio=True) +Additionally, you can change the batch size of the underlying detection and recognition predictors to optimize the performance depending on your hardware: + +* `det_bs`: batch size for the detection model (default: 2) +* `reco_bs`: batch size for the recognition model (default: 128) + +.. code:: python3 + + from doctr.model import ocr_predictor + model = ocr_predictor(pretrained=True, det_bs=4, reco_bs=1024) + To modify the output structure you can pass the following arguments to the predictor which will be handled by the underlying `DocumentBuilder`: * `resolve_lines`: whether words should be automatically grouped into lines (default: True) diff --git a/doctr/models/classification/zoo.py b/doctr/models/classification/zoo.py index 7388c2235..6179ff976 100644 --- a/doctr/models/classification/zoo.py +++ b/doctr/models/classification/zoo.py @@ -42,7 +42,7 @@ def _crop_orientation_predictor(arch: str, pretrained: bool, **kwargs: Any) -> C _model = classification.__dict__[arch](pretrained=pretrained) kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"]) kwargs["std"] = kwargs.get("std", _model.cfg["std"]) - kwargs["batch_size"] = kwargs.get("batch_size", 64) + kwargs["batch_size"] = kwargs.get("batch_size", 128) input_shape = _model.cfg["input_shape"][:-1] if is_tf_available() else _model.cfg["input_shape"][1:] predictor = CropOrientationPredictor( PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs), _model diff --git a/doctr/models/detection/zoo.py b/doctr/models/detection/zoo.py index 11be179c8..2097be2f0 100644 --- a/doctr/models/detection/zoo.py +++ b/doctr/models/detection/zoo.py @@ -62,7 +62,7 @@ def _predictor(arch: Any, pretrained: bool, assume_straight_pages: bool = True, kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"]) kwargs["std"] = kwargs.get("std", _model.cfg["std"]) - kwargs["batch_size"] = kwargs.get("batch_size", 1) + kwargs["batch_size"] = kwargs.get("batch_size", 2) predictor = DetectionPredictor( PreProcessor(_model.cfg["input_shape"][:-1] if is_tf_available() else _model.cfg["input_shape"][1:], **kwargs), _model, diff --git a/doctr/models/recognition/zoo.py b/doctr/models/recognition/zoo.py index 4aae124b7..039324043 100644 --- a/doctr/models/recognition/zoo.py +++ b/doctr/models/recognition/zoo.py @@ -45,7 +45,7 @@ def _predictor(arch: Any, pretrained: bool, **kwargs: Any) -> RecognitionPredict kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"]) kwargs["std"] = kwargs.get("std", _model.cfg["std"]) - kwargs["batch_size"] = kwargs.get("batch_size", 32) + kwargs["batch_size"] = kwargs.get("batch_size", 128) input_shape = _model.cfg["input_shape"][:2] if is_tf_available() else _model.cfg["input_shape"][-2:] predictor = RecognitionPredictor(PreProcessor(input_shape, preserve_aspect_ratio=True, **kwargs), _model)