diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml new file mode 100644 index 0000000..cf14478 --- /dev/null +++ b/.github/workflows/docker.yml @@ -0,0 +1,83 @@ +# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages +# +name: Docker image on ghcr.io + +on: + push: + tags: + - 'v*' + pull_request: + branches: main + schedule: + - cron: '0 2 1 6 *' # At 02:00 on day-of-month 1 in June (once a year actually) + +env: + REGISTRY: ghcr.io + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + # Must match version at https://www.python.org/ftp/python/ + python: ["3.10.13"] + system: ["cpu", "gpu"] + + # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ github.repository }} + tags: | + # used only on schedule event + type=schedule,pattern={{date 'YYYY-MM'}},prefix=OnnxTR-${{ matrix.system }}-py${{ matrix.python }}- + # used only if a tag following semver is published + type=semver,pattern={{raw}},prefix=OnnxTR-${{ matrix.system }}-py${{ matrix.python }}- + + - name: Build Docker image + id: build + uses: docker/build-push-action@v5 + with: + context: . + build-args: | + PYTHON_VERSION=${{ matrix.python }} + SYSTEM=${{ matrix.system }} + ONNXTR_REPO=${{ github.repository }} + ONNXTR_VERSION=${{ github.sha }} + push: false # push only if `import onnxtr` works + tags: ${{ steps.meta.outputs.tags }} + + - name: Check if `import onnxtr` works + run: docker run ${{ steps.build.outputs.imageid }} python3 -c 'import onnxtr; print(onnxtr.__version__)' + + - name: Push Docker image + # Push only if the CI is not triggered by "PR on main" + if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request' + uses: docker/build-push-action@v5 + with: + context: . + build-args: | + PYTHON_VERSION=${{ matrix.python }} + SYSTEM=${{ matrix.system }} + ONNXTR_REPO=${{ github.repository }} + ONNXTR_VERSION=${{ github.sha }} + push: true + tags: ${{ steps.meta.outputs.tags }} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a4ff3c5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,76 @@ +FROM ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive +ENV LANG=C.UTF-8 +ENV PYTHONUNBUFFERED=1 +ENV PYTHONDONTWRITEBYTECODE=1 + +ARG SYSTEM=gpu + +# Enroll NVIDIA GPG public key and install CUDA +RUN if [ "$SYSTEM" = "gpu" ]; then \ + apt-get update && \ + apt-get install -y gnupg ca-certificates wget && \ + # - Install Nvidia repo keys + # - See: https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#network-repo-installation-for-ubuntu + wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \ + dpkg -i cuda-keyring_1.1-1_all.deb && \ + apt-get update && apt-get install -y --no-install-recommends \ + # NOTE: The following CUDA_VERSION, CUDNN_VERSION, and NVINFER_VERSION are for CUDA 11.8 + # - this needs to match exactly with the host system otherwise the onnxruntime-gpu package isn't able to work correct. !! + cuda-command-line-tools-11-8 \ + cuda-cudart-dev-11-8 \ + cuda-nvcc-11-8 \ + cuda-cupti-11-8 \ + cuda-nvprune-11-8 \ + cuda-libraries-11-8 \ + cuda-nvrtc-11-8 \ + libcufft-11-8 \ + libcurand-11-8 \ + libcusolver-11-8 \ + libcusparse-11-8 \ + libcublas-11-8 \ + # - CuDNN: https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#ubuntu-network-installation + libcudnn8=8.6.0.163-1+cuda11.8 \ + libnvinfer-plugin8=8.6.1.6-1+cuda11.8 \ + libnvinfer8=8.6.1.6-1+cuda11.8; \ +fi + +RUN apt-get update && apt-get install -y --no-install-recommends \ + # - Other packages + build-essential \ + pkg-config \ + curl \ + wget \ + software-properties-common \ + unzip \ + git \ + # - Packages to build Python + tar make gcc zlib1g-dev libffi-dev libssl-dev liblzma-dev libbz2-dev libsqlite3-dev \ + # - Packages for docTR + libgl1-mesa-dev libsm6 libxext6 libxrender-dev libpangocairo-1.0-0 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* \ +fi + +# Install Python +ARG PYTHON_VERSION=3.10.13 + +RUN wget http://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \ + tar -zxf Python-$PYTHON_VERSION.tgz && \ + cd Python-$PYTHON_VERSION && \ + mkdir /opt/python/ && \ + ./configure --prefix=/opt/python && \ + make && \ + make install && \ + cd .. && \ + rm Python-$PYTHON_VERSION.tgz && \ + rm -r Python-$PYTHON_VERSION + +ENV PATH=/opt/python/bin:$PATH + +# Install OnnxTR +ARG ONNXTR_REPO='felixdittrich92/onnxtr' +ARG ONNXTR_VERSION=main +RUN pip3 install -U pip setuptools wheel && \ + pip3 install "onnxtr[$SYSTEM,html]@git+https://github.com/$ONNXTR_REPO.git@$ONNXTR_VERSION" diff --git a/README.md b/README.md index aa427c3..db99701 100644 --- a/README.md +++ b/README.md @@ -227,7 +227,7 @@ The smallest combination in OnnxTR (docTR) of `db_mobilenet_v3_large` and `crnn_ |--------------------------------|-------------------------------|-------------------------------| |docTR (GPU) - v0.8.1 | ~0.07s / Page | ~0.05s / Page | |**docTR (GPU) float16** - v0.8.1| **~0.06s / Page** | **~0.03s / Page** | -|OnnxTR (GPU) - v0.1.2 | coming soon | coming soon | +|OnnxTR (GPU) - v0.1.2 | **~0.06s / Page** | ~0.04s / Page | |EasyOCR (GPU) - v1.7.1 | ~0.31s / Page | ~0.19s / Page | |Surya (GPU) float16 - v0.4.4 | ~3.70s / Page | ~2.81s / Page | diff --git a/onnxtr/models/engine.py b/onnxtr/models/engine.py index ab3d63e..dfef017 100644 --- a/onnxtr/models/engine.py +++ b/onnxtr/models/engine.py @@ -43,8 +43,8 @@ def run(self, inputs: np.ndarray) -> np.ndarray: inputs = np.broadcast_to(inputs, (self.fixed_batch_size, *inputs.shape)) # combine the results logits = np.concatenate( - [self.runtime.run(self.output_name, {"input": batch})[0] for batch in inputs], axis=0 + [self.runtime.run(self.output_name, {self.runtime_inputs.name: batch})[0] for batch in inputs], axis=0 ) else: - logits = self.runtime.run(self.output_name, {"input": inputs})[0] + logits = self.runtime.run(self.output_name, {self.runtime_inputs.name: inputs})[0] return shape_translate(logits, format="BHWC")