Skip to content

Commit

Permalink
Add support for workflow_ocr_backend (#291)
Browse files Browse the repository at this point in the history
* Add support for workflow_ocr_backend

* Integration for https://github.com/R0Wi-DEV/workflow_ocr_backend
* Implements #51

* Add full integration test for OcrBackend Service

* Add full integrationtests to pipeline (#294)

* Incorporate code review feedback

* Line ending adjustments
* Add heartbeat check in System Setup Check
* Add additional tests

* Add local CLI tests (#296)
  • Loading branch information
R0Wi authored Feb 12, 2025
1 parent 77a20f0 commit 2808965
Show file tree
Hide file tree
Showing 58 changed files with 5,049 additions and 202 deletions.
138 changes: 138 additions & 0 deletions .github/workflows/phpunit-integration.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
name: PHPUnit-Integration

on:
pull_request:
push:
branches:
- master
- stable*

env:
APP_NAME: workflow_ocr
NEXTCLOUD_PORT: 8080
NEXTCLOUD_USER: "admin"
NEXTCLOUD_PASS: "password"
NC_HAPROXY_PASSWORD: "some_secure_password"
NC_HAPROXY_PORT: 2375
DB_PORT: 4444
MYSQL_ROOT_PASSWORD: "rootpassword"

jobs:
# Do not change this name, it is used in the integration tests
github-php-integrationtests:
runs-on: ubuntu-24.04
services:
docker-socket-proxy:
image: ghcr.io/nextcloud/nextcloud-appapi-dsp:release
env:
NC_HAPROXY_PASSWORD: "some_secure_password"
options: --privileged
volumes:
- /var/run/docker.sock:/var/run/docker.sock
ports:
- 2375:2375
mysql:
image: mariadb:10.5
ports:
- 4444:3306/tcp
env:
MYSQL_ROOT_PASSWORD: rootpassword
options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 5

strategy:
fail-fast: false
matrix:
php-versions: ['8.3']
databases: ['mysql']
server-versions: ['stable31']
backend: ['remote', 'local'] # Do not change these names, they're used in the integration tests

name: php-integrationtests-${{ matrix.backend }}-${{ matrix.php-versions }}-${{ matrix.databases }}

steps:
- name: Checkout server
uses: actions/checkout@v4
with:
repository: nextcloud/server
ref: ${{ matrix.server-versions }}

- name: Checkout submodules
shell: bash
run: |
auth_header="$(git config --local --get http.https://github.com/.extraheader)"
git submodule sync --recursive
git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
- name: Checkout app
uses: actions/checkout@v4
with:
path: apps/${{ env.APP_NAME }}

- name: Set up php ${{ matrix.php-versions }}
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php-versions }}
tools: phpunit
extensions: mbstring, iconv, fileinfo, intl, sqlite, pdo_sqlite, gd, zip, imagick
coverage: none

- name: Install ocrmypdf
if: matrix.backend == 'local'
run: |
sudo apt-get update && sudo apt-get install -y ocrmypdf
ocrmypdf --version
- name: Install composer dependencies
working-directory: apps/${{ env.APP_NAME }}
run: composer i

# Note: ./occ maintenance:mimetype:update-db is required to avoid
# issues with the application/pdf mimetype
- name: Set up Nextcloud
run: |
mkdir data
./occ maintenance:install --verbose --database=${{ matrix.databases }} --database-name=nextcloud \
--database-host=127.0.0.1 --database-port=${{ env.DB_PORT }} --database-user=root --database-pass=${{ env.MYSQL_ROOT_PASSWORD }} \
--admin-user ${{ env.NEXTCLOUD_USER }} --admin-pass ${{ env.NEXTCLOUD_PASS }}
./occ app:enable ${{ env.APP_NAME }}
./occ maintenance:mimetype:update-db
php -S localhost:${{ env.NEXTCLOUD_PORT }} &
- name: Checkout AppApi
uses: actions/checkout@v4
if: matrix.backend == 'remote'
with:
repository: nextcloud/app_api
ref: ${{ matrix.server-versions }}
path: apps/app_api

- name: Set up AppApi/ExApp infrastructure
if: matrix.backend == 'remote'
run: |
./occ app:enable app_api
./occ app_api:daemon:register local_docker "docker-socket-proxy" \
"docker-install" "http" "localhost:${{ env.NC_HAPROXY_PORT }}" "http://localhost:${{ env.NEXTCLOUD_PORT }}" \
--set-default --haproxy_password="${{ env.NC_HAPROXY_PASSWORD }}"
./occ app_api:app:register workflow_ocr_backend \
--wait-finish \
--info-xml https://raw.githubusercontent.com/R0Wi-DEV/workflow_ocr_backend/refs/heads/${{ matrix.server-versions }}/appinfo/info.xml
- name: PHPUnit
working-directory: apps/${{ env.APP_NAME }}
env:
GITHUB_MATRIX_BACKEND: ${{ matrix.backend }}
run: make php-integrationtest

- name: Write OCR Backend logs to file
if: failure() && matrix.backend == 'remote'
run: |
docker logs nc_app_workflow_ocr_backend > data/ocr_backend.log
- name: Upload logs
if: failure()
uses: actions/upload-artifact@v4
with:
name: logs
path: data/*.log


80 changes: 49 additions & 31 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,37 +12,40 @@
## Table of contents

- [Nextcloud Workflow OCR app](#nextcloud-workflow-ocr-app)
- [Table of contents](#table-of-contents)
- [Setup](#setup)
- [App installation](#app-installation)
- [Nextcloud background jobs](#nextcloud-background-jobs)
- [Backend](#backend)
- [Usage](#usage)
- [Useful triggers](#useful-triggers)
- [Trigger OCR if file was created or updated](#trigger-ocr-if-file-was-created-or-updated)
- [Trigger OCR on tag assigning](#trigger-ocr-on-tag-assigning)
- [Settings](#settings)
- [Per workflow settings](#per-workflow-settings)
- [Global settings](#global-settings)
- [Testing your configuration](#testing-your-configuration)
- [Get feedback via Notifications](#get-feedback-via-notifications)
- [How it works](#how-it-works)
- [General](#general)
- [PDF](#pdf)
- [Images](#images)
- [Troubleshooting](#troubleshooting)
- [Generic troubleshooting guide](#generic-troubleshooting-guide)
- [The Nextcloud Workflowengine](#the-nextcloud-workflowengine)
- [Development](#development)
- [Dev setup](#dev-setup)
- [Debugging](#debugging)
- [`docker`-based setup](#docker-based-setup)
- [Executing tests](#executing-tests)
- [Adding a new `OcrProcessor`](#adding-a-new-ocrprocessor)
- [Events emitted by the app](#events-emitted-by-the-app)
- [`TextRecognizedEvent`](#textrecognizedevent)
- [Limitations](#limitations)
- [Used libraries \& components](#used-libraries--components)
- [Table of contents](#table-of-contents)
- [Setup](#setup)
- [App installation](#app-installation)
- [Nextcloud background jobs](#nextcloud-background-jobs)
- [Backend](#backend)
- [Local installation](#local-installation)
- [`workflow_ocr_backend` installation](#workflow_ocr_backend-installation)
- [Setup Checks](#setup-checks)
- [Usage](#usage)
- [Useful triggers](#useful-triggers)
- [Trigger OCR if file was created or updated](#trigger-ocr-if-file-was-created-or-updated)
- [Trigger OCR on tag assigning](#trigger-ocr-on-tag-assigning)
- [Settings](#settings)
- [Per workflow settings](#per-workflow-settings)
- [Global settings](#global-settings)
- [Testing your configuration](#testing-your-configuration)
- [Get feedback via Notifications](#get-feedback-via-notifications)
- [How it works](#how-it-works)
- [General](#general)
- [PDF](#pdf)
- [Images](#images)
- [Troubleshooting](#troubleshooting)
- [Generic troubleshooting guide](#generic-troubleshooting-guide)
- [The Nextcloud Workflowengine](#the-nextcloud-workflowengine)
- [Development](#development)
- [Dev setup](#dev-setup)
- [Debugging](#debugging)
- [`docker`-based setup](#docker-based-setup)
- [Executing tests](#executing-tests)
- [Adding a new `OcrProcessor`](#adding-a-new-ocrprocessor)
- [Events emitted by the app](#events-emitted-by-the-app)
- [`TextRecognizedEvent`](#textrecognizedevent)
- [Limitations](#limitations)
- [Used libraries \& components](#used-libraries--components)

## Setup
### App installation
Expand All @@ -58,6 +61,11 @@ Since the actual processing of the files is done asynchronously via Nextcloud's


### Backend

This app is based on `ocrmypdf`. You can either install the CLI directly on the server running Nextcloud or use the alternative backend setup via Docker.

#### Local installation

> :warning: Since `v1.20.1` you'll have to install `OCRmyPDF`.
In the backend [`OCRmyPDF`](https://github.com/jbarlow83/OCRmyPDF) is used for processing PDF files. Make sure you have this commandline tool installed. Make sure you have the appropriate version (see below, Used libraries').
Expand All @@ -81,6 +89,16 @@ apt-get install tesseract-ocr-deu
apt-get install tesseract-ocr-chi-sim
```

#### `workflow_ocr_backend` installation

Starting from version 30, Nextcloud added support for [AppApi](https://docs.nextcloud.com/server/latest/admin_manual/exapps_management/AppAPIAndExternalApps.html) apps. In essence this allows external container based applications to be integrated into the Nextcloud ecosystem. This app is using this feature to provide an alternative backend setup via Docker.

If everything is setup properly, you can just install the `workflow_ocr_backend` app from the [appstore](https://apps.nextcloud.com/apps/workflow_ocr_backend).

Please refer to **https://github.com/R0Wi-DEV/workflow_ocr_backend** for more information on how to setup the backend.

> :information_source: If the `workflow_ocr_backend` External App is installed, this "frontend" app will automatically use it as the backend even if you installed `ocrmypdf` locally.
### Setup Checks

The app will perform some [Setup Checks](https://docs.nextcloud.com/server/latest/admin_manual/configuration_server/security_setup_warnings.html) to verify your installation. If there is any problem with your backend setup, you'll see an error printed in Nextcloud under `Administration Settings` → `Overview` → `Security & setup warnings`.
Expand Down
11 changes: 11 additions & 0 deletions lib/AppInfo/Application.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@
use OCA\WorkflowOcr\Helper\SidecarFileAccessor;
use OCA\WorkflowOcr\Listener\RegisterFlowOperationsListener;
use OCA\WorkflowOcr\Notification\Notifier;
use OCA\WorkflowOcr\OcrProcessors\CommandLineUtils;
use OCA\WorkflowOcr\OcrProcessors\ICommandLineUtils;
use OCA\WorkflowOcr\OcrProcessors\IOcrProcessorFactory;
use OCA\WorkflowOcr\OcrProcessors\OcrProcessorFactory;
use OCA\WorkflowOcr\OcrProcessors\Remote\Client\ApiClient;
use OCA\WorkflowOcr\OcrProcessors\Remote\Client\IApiClient;
use OCA\WorkflowOcr\Service\EventService;
use OCA\WorkflowOcr\Service\GlobalSettingsService;
use OCA\WorkflowOcr\Service\IEventService;
Expand All @@ -46,8 +50,10 @@
use OCA\WorkflowOcr\Service\OcrBackendInfoService;
use OCA\WorkflowOcr\Service\OcrService;
use OCA\WorkflowOcr\SetupChecks\OcrMyPdfCheck;
use OCA\WorkflowOcr\Wrapper\AppApiWrapper;
use OCA\WorkflowOcr\Wrapper\CommandWrapper;
use OCA\WorkflowOcr\Wrapper\Filesystem;
use OCA\WorkflowOcr\Wrapper\IAppApiWrapper;
use OCA\WorkflowOcr\Wrapper\ICommand;
use OCA\WorkflowOcr\Wrapper\IFilesystem;
use OCA\WorkflowOcr\Wrapper\IViewFactory;
Expand All @@ -63,6 +69,8 @@

class Application extends App implements IBootstrap {
public const APP_NAME = 'workflow_ocr';
public const APP_BACKEND_NAME = 'workflow_ocr_backend';
public const APP_API_APP_NAME = 'app_api';

/**
* Application constructor.
Expand All @@ -83,6 +91,9 @@ public function register(IRegistrationContext $context): void {
$context->registerServiceAlias(IEventService::class, EventService::class);
$context->registerServiceAlias(IOcrBackendInfoService::class, OcrBackendInfoService::class);
$context->registerServiceAlias(INotificationService::class, NotificationService::class);
$context->registerServiceAlias(IApiClient::class, ApiClient::class);
$context->registerServiceAlias(ICommandLineUtils::class, CommandLineUtils::class);
$context->registerServiceAlias(IAppApiWrapper::class, AppApiWrapper::class);

// BUG #43
$context->registerService(ICommand::class, function () {
Expand Down
4 changes: 2 additions & 2 deletions lib/Exception/OcrProcessorNotFoundException.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
use Exception;

class OcrProcessorNotFoundException extends Exception {
public function __construct(string $mimeType) {
$this->message = 'OCR processor for mime type ' . $mimeType . ' not found';
public function __construct(string $mimeType, bool $useRemoteBackend) {
$this->message = 'OCR processor for mime type ' . $mimeType . '(useRemoteBackend=' . $useRemoteBackend . ') not found';
}
}
2 changes: 2 additions & 0 deletions lib/Helper/SidecarFileAccessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ public function getOrCreateSidecarFile() {
$this->sidecarFilePath = $this->tempManager->getTemporaryFile('sidecar');
if (!$this->sidecarFilePath) {
$this->logger->warning('Could not create temporary sidecar file');
} elseif (!is_writable($this->sidecarFilePath)) {
$this->logger->warning('Temporary sidecar file is not writable');
}
}
return $this->sidecarFilePath;
Expand Down
2 changes: 1 addition & 1 deletion lib/Model/WorkflowSettings.php
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ private function setJson(?string $json = null) {
$this->setProperty($this->customCliArgs, $data, 'customCliArgs', fn ($value) => is_string($value));
}

private function setProperty(& $property, array $jsonData, string $key, ?callable $dataCheck = null): void {
private function setProperty(array|bool|int|string & $property, array $jsonData, string $key, ?callable $dataCheck = null): void {
if (array_key_exists($key, $jsonData) && ($dataCheck === null || $dataCheck($jsonData[$key]))) {
$property = $jsonData[$key];
}
Expand Down
Loading

0 comments on commit 2808965

Please sign in to comment.