From 285ea8522379fab0797d245bad58ee449f54f7f3 Mon Sep 17 00:00:00 2001 From: Wilco Louwerse Date: Tue, 28 Jan 2025 17:13:03 +0100 Subject: [PATCH] Added code to find files by searching file content --- appinfo/routes.php | 1 + composer.json | 7 + composer.lock | 771 ++++++++++++++++++++++++++++- lib/Controller/FilesController.php | 66 +++ lib/Service/FileService.php | 375 ++++++++++++-- lib/Service/ObjectService.php | 48 +- 6 files changed, 1181 insertions(+), 87 deletions(-) create mode 100644 lib/Controller/FilesController.php diff --git a/appinfo/routes.php b/appinfo/routes.php index 11bef22..97d5526 100644 --- a/appinfo/routes.php +++ b/appinfo/routes.php @@ -24,5 +24,6 @@ ['name' => 'objects#lock', 'url' => '/api/objects/{id}/lock', 'verb' => 'POST'], ['name' => 'objects#unlock', 'url' => '/api/objects/{id}/unlock', 'verb' => 'POST'], ['name' => 'objects#revert', 'url' => '/api/objects/{id}/revert', 'verb' => 'POST'], + ['name' => 'files#search', 'url' => '/api/files/search', 'verb' => 'GET'], ], ]; diff --git a/composer.json b/composer.json index 48c0e0e..55c5ad1 100644 --- a/composer.json +++ b/composer.json @@ -30,13 +30,20 @@ }, "require": { "php": "^8.1", + "ext-simplexml": "*", + "ext-zip": "*", "adbario/php-dot-notation": "^3.3.0", "bamarni/composer-bin-plugin": "^1.8", "elasticsearch/elasticsearch": "^v8.14.0", "guzzlehttp/guzzle": "^7.0", + "league/csv": "^9.8", "opis/json-schema": "^2.3", + "phpoffice/phpspreadsheet": "^3.9", + "phpoffice/phpword": "^1.3", + "smalot/pdfparser": "^2.11", "symfony/uid": "^6.4", "symfony/yaml": "^6.4", + "thiagoalessio/tesseract_ocr": "^2.13", "twig/twig": "^3.18" }, "require-dev": { diff --git a/composer.lock b/composer.lock index a59137f..2caa220 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "065490421925be3ad6cb1128af7a5e1c", + "content-hash": "e814a989db57e5a8a6248e5fb56da0dd", "packages": [ { "name": "adbario/php-dot-notation", @@ -117,6 +117,85 @@ }, "time": "2022-10-31T08:38:03+00:00" }, + { + "name": "composer/pcre", + "version": "3.3.2", + "source": { + "type": "git", + "url": "https://github.com/composer/pcre.git", + "reference": "b2bed4734f0cc156ee1fe9c0da2550420d99a21e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/composer/pcre/zipball/b2bed4734f0cc156ee1fe9c0da2550420d99a21e", + "reference": "b2bed4734f0cc156ee1fe9c0da2550420d99a21e", + "shasum": "" + }, + "require": { + "php": "^7.4 || ^8.0" + }, + "conflict": { + "phpstan/phpstan": "<1.11.10" + }, + "require-dev": { + "phpstan/phpstan": "^1.12 || ^2", + "phpstan/phpstan-strict-rules": "^1 || ^2", + "phpunit/phpunit": "^8 || ^9" + }, + "type": "library", + "extra": { + "phpstan": { + "includes": [ + "extension.neon" + ] + }, + "branch-alias": { + "dev-main": "3.x-dev" + } + }, + "autoload": { + "psr-4": { + "Composer\\Pcre\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Jordi Boggiano", + "email": "j.boggiano@seld.be", + "homepage": "http://seld.be" + } + ], + "description": "PCRE wrapping library that offers type-safe preg_* replacements.", + "keywords": [ + "PCRE", + "preg", + "regex", + "regular expression" + ], + "support": { + "issues": "https://github.com/composer/pcre/issues", + "source": "https://github.com/composer/pcre/tree/3.3.2" + }, + "funding": [ + { + "url": "https://packagist.com", + "type": "custom" + }, + { + "url": "https://github.com/composer", + "type": "github" + }, + { + "url": "https://tidelift.com/funding/github/packagist/composer/composer", + "type": "tidelift" + } + ], + "time": "2024-11-12T16:29:46+00:00" + }, { "name": "elastic/transport", "version": "v8.10.0", @@ -558,6 +637,274 @@ ], "time": "2024-07-18T11:15:46+00:00" }, + { + "name": "league/csv", + "version": "9.8.0", + "source": { + "type": "git", + "url": "https://github.com/thephpleague/csv.git", + "reference": "9d2e0265c5d90f5dd601bc65ff717e05cec19b47" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/thephpleague/csv/zipball/9d2e0265c5d90f5dd601bc65ff717e05cec19b47", + "reference": "9d2e0265c5d90f5dd601bc65ff717e05cec19b47", + "shasum": "" + }, + "require": { + "ext-json": "*", + "ext-mbstring": "*", + "php": "^7.4 || ^8.0" + }, + "require-dev": { + "ext-curl": "*", + "ext-dom": "*", + "friendsofphp/php-cs-fixer": "^v3.4.0", + "phpstan/phpstan": "^1.3.0", + "phpstan/phpstan-phpunit": "^1.0.0", + "phpstan/phpstan-strict-rules": "^1.1.0", + "phpunit/phpunit": "^9.5.11" + }, + "suggest": { + "ext-dom": "Required to use the XMLConverter and or the HTMLConverter classes", + "ext-iconv": "Needed to ease transcoding CSV using iconv stream filters" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "9.x-dev" + } + }, + "autoload": { + "files": [ + "src/functions_include.php" + ], + "psr-4": { + "League\\Csv\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Ignace Nyamagana Butera", + "email": "nyamsprod@gmail.com", + "homepage": "https://github.com/nyamsprod/", + "role": "Developer" + } + ], + "description": "CSV data manipulation made easy in PHP", + "homepage": "https://csv.thephpleague.com", + "keywords": [ + "convert", + "csv", + "export", + "filter", + "import", + "read", + "transform", + "write" + ], + "support": { + "docs": "https://csv.thephpleague.com", + "issues": "https://github.com/thephpleague/csv/issues", + "rss": "https://github.com/thephpleague/csv/releases.atom", + "source": "https://github.com/thephpleague/csv" + }, + "funding": [ + { + "url": "https://github.com/sponsors/nyamsprod", + "type": "github" + } + ], + "time": "2022-01-04T00:13:07+00:00" + }, + { + "name": "maennchen/zipstream-php", + "version": "3.1.1", + "source": { + "type": "git", + "url": "https://github.com/maennchen/ZipStream-PHP.git", + "reference": "6187e9cc4493da94b9b63eb2315821552015fca9" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/maennchen/ZipStream-PHP/zipball/6187e9cc4493da94b9b63eb2315821552015fca9", + "reference": "6187e9cc4493da94b9b63eb2315821552015fca9", + "shasum": "" + }, + "require": { + "ext-mbstring": "*", + "ext-zlib": "*", + "php-64bit": "^8.1" + }, + "require-dev": { + "ext-zip": "*", + "friendsofphp/php-cs-fixer": "^3.16", + "guzzlehttp/guzzle": "^7.5", + "mikey179/vfsstream": "^1.6", + "php-coveralls/php-coveralls": "^2.5", + "phpunit/phpunit": "^10.0", + "vimeo/psalm": "^5.0" + }, + "suggest": { + "guzzlehttp/psr7": "^2.4", + "psr/http-message": "^2.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "ZipStream\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Paul Duncan", + "email": "pabs@pablotron.org" + }, + { + "name": "Jonatan Männchen", + "email": "jonatan@maennchen.ch" + }, + { + "name": "Jesse Donat", + "email": "donatj@gmail.com" + }, + { + "name": "András Kolesár", + "email": "kolesar@kolesar.hu" + } + ], + "description": "ZipStream is a library for dynamically streaming dynamic zip files from PHP without writing to the disk at all on the server.", + "keywords": [ + "stream", + "zip" + ], + "support": { + "issues": "https://github.com/maennchen/ZipStream-PHP/issues", + "source": "https://github.com/maennchen/ZipStream-PHP/tree/3.1.1" + }, + "funding": [ + { + "url": "https://github.com/maennchen", + "type": "github" + } + ], + "time": "2024-10-10T12:33:01+00:00" + }, + { + "name": "markbaker/complex", + "version": "3.0.2", + "source": { + "type": "git", + "url": "https://github.com/MarkBaker/PHPComplex.git", + "reference": "95c56caa1cf5c766ad6d65b6344b807c1e8405b9" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/MarkBaker/PHPComplex/zipball/95c56caa1cf5c766ad6d65b6344b807c1e8405b9", + "reference": "95c56caa1cf5c766ad6d65b6344b807c1e8405b9", + "shasum": "" + }, + "require": { + "php": "^7.2 || ^8.0" + }, + "require-dev": { + "dealerdirect/phpcodesniffer-composer-installer": "dev-master", + "phpcompatibility/php-compatibility": "^9.3", + "phpunit/phpunit": "^7.0 || ^8.0 || ^9.0", + "squizlabs/php_codesniffer": "^3.7" + }, + "type": "library", + "autoload": { + "psr-4": { + "Complex\\": "classes/src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Mark Baker", + "email": "mark@lange.demon.co.uk" + } + ], + "description": "PHP Class for working with complex numbers", + "homepage": "https://github.com/MarkBaker/PHPComplex", + "keywords": [ + "complex", + "mathematics" + ], + "support": { + "issues": "https://github.com/MarkBaker/PHPComplex/issues", + "source": "https://github.com/MarkBaker/PHPComplex/tree/3.0.2" + }, + "time": "2022-12-06T16:21:08+00:00" + }, + { + "name": "markbaker/matrix", + "version": "3.0.1", + "source": { + "type": "git", + "url": "https://github.com/MarkBaker/PHPMatrix.git", + "reference": "728434227fe21be27ff6d86621a1b13107a2562c" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/MarkBaker/PHPMatrix/zipball/728434227fe21be27ff6d86621a1b13107a2562c", + "reference": "728434227fe21be27ff6d86621a1b13107a2562c", + "shasum": "" + }, + "require": { + "php": "^7.1 || ^8.0" + }, + "require-dev": { + "dealerdirect/phpcodesniffer-composer-installer": "dev-master", + "phpcompatibility/php-compatibility": "^9.3", + "phpdocumentor/phpdocumentor": "2.*", + "phploc/phploc": "^4.0", + "phpmd/phpmd": "2.*", + "phpunit/phpunit": "^7.0 || ^8.0 || ^9.0", + "sebastian/phpcpd": "^4.0", + "squizlabs/php_codesniffer": "^3.7" + }, + "type": "library", + "autoload": { + "psr-4": { + "Matrix\\": "classes/src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Mark Baker", + "email": "mark@demon-angel.eu" + } + ], + "description": "PHP Class for working with matrices", + "homepage": "https://github.com/MarkBaker/PHPMatrix", + "keywords": [ + "mathematics", + "matrix", + "vector" + ], + "support": { + "issues": "https://github.com/MarkBaker/PHPMatrix/issues", + "source": "https://github.com/MarkBaker/PHPMatrix/tree/3.0.1" + }, + "time": "2022-12-02T22:17:43+00:00" + }, { "name": "open-telemetry/api", "version": "1.1.1", @@ -1065,6 +1412,272 @@ }, "time": "2024-03-15T13:55:21+00:00" }, + { + "name": "phpoffice/math", + "version": "0.2.0", + "source": { + "type": "git", + "url": "https://github.com/PHPOffice/Math.git", + "reference": "fc2eb6d1a61b058d5dac77197059db30ee3c8329" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/PHPOffice/Math/zipball/fc2eb6d1a61b058d5dac77197059db30ee3c8329", + "reference": "fc2eb6d1a61b058d5dac77197059db30ee3c8329", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-xml": "*", + "php": "^7.1|^8.0" + }, + "require-dev": { + "phpstan/phpstan": "^0.12.88 || ^1.0.0", + "phpunit/phpunit": "^7.0 || ^9.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "PhpOffice\\Math\\": "src/Math/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Progi1984", + "homepage": "https://lefevre.dev" + } + ], + "description": "Math - Manipulate Math Formula", + "homepage": "https://phpoffice.github.io/Math/", + "keywords": [ + "MathML", + "officemathml", + "php" + ], + "support": { + "issues": "https://github.com/PHPOffice/Math/issues", + "source": "https://github.com/PHPOffice/Math/tree/0.2.0" + }, + "time": "2024-08-12T07:30:45+00:00" + }, + { + "name": "phpoffice/phpspreadsheet", + "version": "3.9.0", + "source": { + "type": "git", + "url": "https://github.com/PHPOffice/PhpSpreadsheet.git", + "reference": "414f8a2aa1d8b974b39f577c0677d5ebc96fab36" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/PHPOffice/PhpSpreadsheet/zipball/414f8a2aa1d8b974b39f577c0677d5ebc96fab36", + "reference": "414f8a2aa1d8b974b39f577c0677d5ebc96fab36", + "shasum": "" + }, + "require": { + "composer/pcre": "^3.3", + "ext-ctype": "*", + "ext-dom": "*", + "ext-fileinfo": "*", + "ext-gd": "*", + "ext-iconv": "*", + "ext-libxml": "*", + "ext-mbstring": "*", + "ext-simplexml": "*", + "ext-xml": "*", + "ext-xmlreader": "*", + "ext-xmlwriter": "*", + "ext-zip": "*", + "ext-zlib": "*", + "maennchen/zipstream-php": "^2.1 || ^3.0", + "markbaker/complex": "^3.0", + "markbaker/matrix": "^3.0", + "php": "^8.1", + "psr/http-client": "^1.0", + "psr/http-factory": "^1.0", + "psr/simple-cache": "^1.0 || ^2.0 || ^3.0" + }, + "require-dev": { + "dealerdirect/phpcodesniffer-composer-installer": "dev-main", + "dompdf/dompdf": "^2.0 || ^3.0", + "friendsofphp/php-cs-fixer": "^3.2", + "mitoteam/jpgraph": "^10.3", + "mpdf/mpdf": "^8.1.1", + "phpcompatibility/php-compatibility": "^9.3", + "phpstan/phpstan": "^1.1", + "phpstan/phpstan-phpunit": "^1.0", + "phpunit/phpunit": "^10.5", + "squizlabs/php_codesniffer": "^3.7", + "tecnickcom/tcpdf": "^6.5" + }, + "suggest": { + "dompdf/dompdf": "Option for rendering PDF with PDF Writer", + "ext-intl": "PHP Internationalization Functions", + "mitoteam/jpgraph": "Option for rendering charts, or including charts with PDF or HTML Writers", + "mpdf/mpdf": "Option for rendering PDF with PDF Writer", + "tecnickcom/tcpdf": "Option for rendering PDF with PDF Writer" + }, + "type": "library", + "autoload": { + "psr-4": { + "PhpOffice\\PhpSpreadsheet\\": "src/PhpSpreadsheet" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Maarten Balliauw", + "homepage": "https://blog.maartenballiauw.be" + }, + { + "name": "Mark Baker", + "homepage": "https://markbakeruk.net" + }, + { + "name": "Franck Lefevre", + "homepage": "https://rootslabs.net" + }, + { + "name": "Erik Tilt" + }, + { + "name": "Adrien Crivelli" + } + ], + "description": "PHPSpreadsheet - Read, Create and Write Spreadsheet documents in PHP - Spreadsheet engine", + "homepage": "https://github.com/PHPOffice/PhpSpreadsheet", + "keywords": [ + "OpenXML", + "excel", + "gnumeric", + "ods", + "php", + "spreadsheet", + "xls", + "xlsx" + ], + "support": { + "issues": "https://github.com/PHPOffice/PhpSpreadsheet/issues", + "source": "https://github.com/PHPOffice/PhpSpreadsheet/tree/3.9.0" + }, + "time": "2025-01-26T05:10:24+00:00" + }, + { + "name": "phpoffice/phpword", + "version": "1.3.0", + "source": { + "type": "git", + "url": "https://github.com/PHPOffice/PHPWord.git", + "reference": "8392134ce4b5dba65130ba956231a1602b848b7f" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/PHPOffice/PHPWord/zipball/8392134ce4b5dba65130ba956231a1602b848b7f", + "reference": "8392134ce4b5dba65130ba956231a1602b848b7f", + "shasum": "" + }, + "require": { + "ext-dom": "*", + "ext-json": "*", + "ext-xml": "*", + "php": "^7.1|^8.0", + "phpoffice/math": "^0.2" + }, + "require-dev": { + "dompdf/dompdf": "^2.0", + "ext-gd": "*", + "ext-libxml": "*", + "ext-zip": "*", + "friendsofphp/php-cs-fixer": "^3.3", + "mpdf/mpdf": "^8.1", + "phpmd/phpmd": "^2.13", + "phpstan/phpstan-phpunit": "@stable", + "phpunit/phpunit": ">=7.0", + "symfony/process": "^4.4 || ^5.0", + "tecnickcom/tcpdf": "^6.5" + }, + "suggest": { + "dompdf/dompdf": "Allows writing PDF", + "ext-gd2": "Allows adding images", + "ext-xmlwriter": "Allows writing OOXML and ODF", + "ext-xsl": "Allows applying XSL style sheet to headers, to main document part, and to footers of an OOXML template", + "ext-zip": "Allows writing OOXML and ODF" + }, + "type": "library", + "autoload": { + "psr-4": { + "PhpOffice\\PhpWord\\": "src/PhpWord" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "LGPL-3.0" + ], + "authors": [ + { + "name": "Mark Baker" + }, + { + "name": "Gabriel Bull", + "email": "me@gabrielbull.com", + "homepage": "http://gabrielbull.com/" + }, + { + "name": "Franck Lefevre", + "homepage": "https://rootslabs.net/blog/" + }, + { + "name": "Ivan Lanin", + "homepage": "http://ivan.lanin.org" + }, + { + "name": "Roman Syroeshko", + "homepage": "http://ru.linkedin.com/pub/roman-syroeshko/34/a53/994/" + }, + { + "name": "Antoine de Troostembergh" + } + ], + "description": "PHPWord - A pure PHP library for reading and writing word processing documents (OOXML, ODF, RTF, HTML, PDF)", + "homepage": "https://phpoffice.github.io/PHPWord/", + "keywords": [ + "ISO IEC 29500", + "OOXML", + "Office Open XML", + "OpenDocument", + "OpenXML", + "PhpOffice", + "PhpWord", + "Rich Text Format", + "WordprocessingML", + "doc", + "docx", + "html", + "odf", + "odt", + "office", + "pdf", + "php", + "reader", + "rtf", + "template", + "template processor", + "word", + "writer" + ], + "support": { + "issues": "https://github.com/PHPOffice/PHPWord/issues", + "source": "https://github.com/PHPOffice/PHPWord/tree/1.3.0" + }, + "time": "2024-08-30T18:03:42+00:00" + }, { "name": "psr/http-client", "version": "1.0.3", @@ -1275,6 +1888,57 @@ }, "time": "2021-05-03T11:20:27+00:00" }, + { + "name": "psr/simple-cache", + "version": "3.0.0", + "source": { + "type": "git", + "url": "https://github.com/php-fig/simple-cache.git", + "reference": "764e0b3939f5ca87cb904f570ef9be2d78a07865" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/php-fig/simple-cache/zipball/764e0b3939f5ca87cb904f570ef9be2d78a07865", + "reference": "764e0b3939f5ca87cb904f570ef9be2d78a07865", + "shasum": "" + }, + "require": { + "php": ">=8.0.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.0.x-dev" + } + }, + "autoload": { + "psr-4": { + "Psr\\SimpleCache\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "PHP-FIG", + "homepage": "https://www.php-fig.org/" + } + ], + "description": "Common interfaces for simple caching", + "keywords": [ + "cache", + "caching", + "psr", + "psr-16", + "simple-cache" + ], + "support": { + "source": "https://github.com/php-fig/simple-cache/tree/3.0.0" + }, + "time": "2021-10-29T13:26:27+00:00" + }, { "name": "ralouphie/getallheaders", "version": "3.0.3", @@ -1319,6 +1983,57 @@ }, "time": "2019-03-08T08:55:37+00:00" }, + { + "name": "smalot/pdfparser", + "version": "v2.11.0", + "source": { + "type": "git", + "url": "https://github.com/smalot/pdfparser.git", + "reference": "ac8e6678b0940e4b2ccd5caadd3fb18e68093be6" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/smalot/pdfparser/zipball/ac8e6678b0940e4b2ccd5caadd3fb18e68093be6", + "reference": "ac8e6678b0940e4b2ccd5caadd3fb18e68093be6", + "shasum": "" + }, + "require": { + "ext-iconv": "*", + "ext-zlib": "*", + "php": ">=7.1", + "symfony/polyfill-mbstring": "^1.18" + }, + "type": "library", + "autoload": { + "psr-0": { + "Smalot\\PdfParser\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "LGPL-3.0" + ], + "authors": [ + { + "name": "Sebastien MALOT", + "email": "sebastien@malot.fr" + } + ], + "description": "Pdf parser library. Can read and extract information from pdf file.", + "homepage": "https://www.pdfparser.org", + "keywords": [ + "extract", + "parse", + "parser", + "pdf", + "text" + ], + "support": { + "issues": "https://github.com/smalot/pdfparser/issues", + "source": "https://github.com/smalot/pdfparser/tree/v2.11.0" + }, + "time": "2024-08-16T06:48:03+00:00" + }, { "name": "symfony/deprecation-contracts", "version": "v3.5.0", @@ -1922,6 +2637,55 @@ ], "time": "2024-09-17T12:47:12+00:00" }, + { + "name": "thiagoalessio/tesseract_ocr", + "version": "2.13.0", + "source": { + "type": "git", + "url": "https://github.com/thiagoalessio/tesseract-ocr-for-php.git", + "reference": "232a8cb9d571992f9bd1e263f2f6909cf6c173a1" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/thiagoalessio/tesseract-ocr-for-php/zipball/232a8cb9d571992f9bd1e263f2f6909cf6c173a1", + "reference": "232a8cb9d571992f9bd1e263f2f6909cf6c173a1", + "shasum": "" + }, + "require": { + "php": "^5.3 || ^7.0 || ^8.0" + }, + "require-dev": { + "phpunit/php-code-coverage": "^2.2.4 || ^9.0.0" + }, + "type": "library", + "autoload": { + "psr-4": { + "thiagoalessio\\TesseractOCR\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "thiagoalessio", + "email": "thiagoalessio@me.com" + } + ], + "description": "A wrapper to work with Tesseract OCR inside PHP.", + "keywords": [ + "OCR", + "Tesseract", + "text recognition" + ], + "support": { + "irc": "irc://irc.freenode.net/tesseract-ocr-for-php", + "issues": "https://github.com/thiagoalessio/tesseract-ocr-for-php/issues", + "source": "https://github.com/thiagoalessio/tesseract-ocr-for-php" + }, + "time": "2023-10-05T21:14:48+00:00" + }, { "name": "twig/twig", "version": "v3.18.0", @@ -3043,7 +3807,10 @@ "prefer-stable": false, "prefer-lowest": false, "platform": { - "php": "^8.1" + "php": "^8.1", + "ext-exif": "*", + "ext-simplexml": "*", + "ext-zip": "*" }, "platform-dev": [], "platform-overrides": { diff --git a/lib/Controller/FilesController.php b/lib/Controller/FilesController.php new file mode 100644 index 0000000..0832885 --- /dev/null +++ b/lib/Controller/FilesController.php @@ -0,0 +1,66 @@ +request->getParams(); + + try { + $results = $this->fileService->search(searchTerm: $data['_search'], folderPath: $data['_folderPath'] ?? null); + + return new JSONResponse(['results' => $results]); + } catch (InvalidPathException|NotFoundException|NotPermittedException $e) { + return new JSONResponse(['error' => $e->getMessage()], 500); + } + } +} diff --git a/lib/Service/FileService.php b/lib/Service/FileService.php index edd20d3..473c6de 100644 --- a/lib/Service/FileService.php +++ b/lib/Service/FileService.php @@ -2,15 +2,15 @@ namespace OCA\OpenRegister\Service; -use DateTime; use Exception; +use DateTime; use OCA\OpenRegister\Db\ObjectEntity; use OCA\OpenRegister\Db\Register; use OCA\OpenRegister\Db\RegisterMapper; use OCA\OpenRegister\Db\Schema; use OCA\OpenRegister\Db\SchemaMapper; -use OCP\AppFramework\Http\JSONResponse; use OCP\Files\File; +use OCP\Files\Folder; use OCP\Files\GenericFileException; use OCP\Files\InvalidPathException; use OCP\Files\IRootFolder; @@ -19,13 +19,17 @@ use OCP\Files\NotPermittedException; use OCP\IConfig; use OCP\IGroupManager; -use OCP\IRequest; use OCP\IURLGenerator; use OCP\IUserSession; use OCP\Lock\LockedException; use OCP\Share\IManager; use OCP\Share\IShare; +use PhpOffice\PhpWord\IOFactory; use Psr\Log\LoggerInterface; +use Smalot\PdfParser\Parser; +use Symfony\Component\Yaml\Yaml; +use thiagoalessio\TesseractOCR\TesseractOCR; +use ZipArchive; /** * Service for handling file operations in OpenRegister. @@ -200,7 +204,7 @@ public function getObjectFolder( Schema|int|null $schema = null ): ?Node { - if($objectEntity->getFolder() === null) { + if ($objectEntity->getFolder() === null) { $folderPath = $this->getObjectFolderPath( objectEntity: $objectEntity, register: $register, @@ -298,7 +302,7 @@ private function getObjectFolderName(ObjectEntity $objectEntity): string * * @return string The share link needed to get the file or folder for the given IShare object. */ - private function getFolderLink(string $folderPath): string + public function getFolderLink(string $folderPath): string { $folderPath = str_replace('%2F', '/', urlencode($folderPath)); return $this->getCurrentDomain() . "/index.php/apps/files/files?dir=$folderPath"; @@ -348,13 +352,312 @@ private function getNode(string $path): ?Node $userFolder = $this->rootFolder->getUserFolder(userId: $currentUser ? $currentUser->getUID() : 'Guest'); try { - return $node = $userFolder->get(path: $path); + return $userFolder->get(path: $path); } catch (NotFoundException $e) { $this->logger->error(message: $e->getMessage()); return null; } } + /** + * Perform a search on all files in a given folder / from a specific path. Find all files where content contains the search term. + * + * @param string $searchTerm The term to search for in file content. + * @param string|null $folderPath The folder to search files in. + * + * @return array The search results containing matching files. + * @throws InvalidPathException + * @throws NotFoundException + * @throws NotPermittedException + */ + public function search(string $searchTerm, string $folderPath = null): array { + $results = []; + + // Get the current user. + $currentUser = $this->userSession->getUser(); + $userFolder = $this->rootFolder->getUserFolder(userId: $currentUser ? $currentUser->getUID() : 'Guest'); + + try { + $rootFolder = $userFolder->get(self::ROOT_FOLDER); + } catch(NotFoundException $exception) { + $rootFolder = $userFolder->newFolder(self::ROOT_FOLDER); + } + + $searchFolder = $rootFolder; + if (empty($folderPath) === false) { + try { + $searchFolder = $userFolder->get($folderPath); + } catch(NotFoundException $exception) { + $this->logger->error(message: 'Could not find this folder to search in: ' . $folderPath); + } + } + + try { + // Recursively search files + $this->searchFilesContext(folder: $searchFolder, searchTerm: $searchTerm, results: $results); + } catch (Exception $e) { + $this->logger->error(message: 'Error during search: ' . $e->getMessage()); + } + + return $this->formatFiles($results); + } + + /** + * Recursively search for files in a folder where the content of the file contain the search term. + * + * @param Folder $folder The folder to search within. + * @param string $searchTerm The term to search for in file content. + * @param array &$results The array to store search results. + * + * @return void + * @throws NotFoundException + */ + private function searchFilesContext(Folder $folder, string $searchTerm, array &$results): void + { + foreach ($folder->getDirectoryListing() as $node) { + if ($node instanceof File) { + try { + $content = $this->decodeFileContent($node); + } catch (Exception $e) { + $this->logger->error(message: 'searchFilesByContext error: ' . $e->getMessage()); + continue; + } + + // Check if the file content contains the search term + if (str_contains(strtolower($content), strtolower($searchTerm))) { + $results[] = $node; + } + } elseif ($node instanceof Folder) { + // Recursively search subfolders + $this->searchFilesContext(folder: $node, searchTerm: $searchTerm, results: $results); + } + } + } + + /** + * Decodes the content of a file based on its extension and returns the extracted or formatted text. + * + * This function supports the following file types: + * - DOC/DOCX: Extracts text content using the PhpOffice\PhpWord\IOFactory. + * - PDF: Extracts text content using the Smalot\PdfParser\Parser. + * - JSON: Decodes the JSON content into an associative array and re-encodes it with pretty printing. + * - YAML: Parses the YAML content and converts it back to a YAML string if valid. + * + * If the file type is unsupported, an error is logged, and the function returns null. + * + * @param File $file The file whose content needs to be decoded. + * + * @return string|null The decoded content of the file as a string, or null if decoding fails + * or the file type is unsupported. + * @throws GenericFileException + * @throws LockedException + * @throws NotPermittedException + */ + private function decodeFileContent(File $file): ?string + { + $content = $file->getContent(); + + switch (strtolower($file->getExtension())) { + case 'txt': + case 'md': + case 'log': + return $content; + case 'html': + case 'htm': + return strip_tags($content); + case 'docx': + case 'doc': + return $this->decodeWordFileContent($content); + case 'xlsx': + case 'xls': + return $this->decodeExcelFileContent($content); + case 'csv': + $rows = array_map('str_getcsv', explode(PHP_EOL, $content)); + return json_encode($rows, JSON_PRETTY_PRINT); + case 'pdf': + $parser = new Parser(); + $pdf = $parser->parseContent($content); + return $pdf->getText(); + case 'json': + $data = json_decode($content, true); + return json_last_error() === JSON_ERROR_NONE ? json_encode($data, JSON_PRETTY_PRINT) : ''; + case 'yaml': + $data = Yaml::parse($content); + return is_array($data) ? Yaml::dump($data) : ''; + case 'xml': + $xml = simplexml_load_string($content); + return $xml ? $xml->asXML() : ''; + case 'zip': + return $this->decodeArchiveContent($content); + case 'jpg': + case 'jpeg': + case 'png': + case 'gif': + return $this->decodeImageContent($content); + default: + $this->logger->error(message: 'Unsupported file extension/type cannot decode it\'s content'); + return null; + } + } + + /** + * Decodes the content of a Word file and extracts the text from it. + * + * This function processes the binary content of a Word document (e.g., .docx or .doc), + * saves it temporarily to the file system, and uses `PhpOffice\PhpWord\IOFactory` + * to load and parse the file. The text content is extracted from the document's sections + * and elements and returned as a plain string. + * + * @param string $content The binary content of the Word file. + * + * @return string The extracted text content of the Word file. + */ + private function decodeWordFileContent(string $content): string + { + $tempFile = tempnam(sys_get_temp_dir(), 'word_'); + file_put_contents($tempFile, $content); + + $phpWord = IOFactory::load($tempFile); + unlink($tempFile); + + $text = ''; + foreach ($phpWord->getSections() as $section) { + foreach ($section->getElements() as $element) { + if (method_exists($element, 'getText')) { + $text .= $element->getText() . "\n"; + } + } + } + + return $text; + } + + /** + * Decodes the content of an Excel file and extracts its data as a JSON string. + * + * This function processes the binary content of an Excel file (e.g., `.xlsx` or `.xls`) + * by saving it temporarily to the file system, loading it using `PhpOffice\PhpSpreadsheet`, + * and converting the active sheet's data into a structured array. The data is then encoded + * as a pretty-printed JSON string. + * + * @param string $content The binary content of the Excel file. + * + * @return string The extracted sheet data in JSON format, with cell references as keys. + * @throws \PhpOffice\PhpSpreadsheet\Reader\Exception If the file cannot be read or parsed. + */ + private function decodeExcelFileContent(string $content): string + { + $tempFile = tempnam(sys_get_temp_dir(), 'excel_'); + file_put_contents($tempFile, $content); + + $spreadsheet = \PhpOffice\PhpSpreadsheet\IOFactory::load($tempFile); + unlink($tempFile); + + $sheetData = $spreadsheet->getActiveSheet()->toArray(returnCellRef: true); + return json_encode($sheetData, JSON_PRETTY_PRINT); + } + + /** + * Decodes the content of a ZIP archive and extracts a list of contained files. + * + * This function processes the binary content of a ZIP archive by temporarily saving it to the + * file system, opening it with PHP's `ZipArchive`, and retrieving the names of all files within + * the archive. The extracted file list is returned as a pretty-printed JSON string. + * + * @param string $content The binary content of the ZIP archive. + * + * @return string A JSON-encoded array containing the names of files in the archive. + * @throws Exception If the archive cannot be opened or processed. + */ + private function decodeArchiveContent(string $content): string + { + $tempFile = tempnam(sys_get_temp_dir(), 'zip_'); + file_put_contents($tempFile, $content); + + $zip = new ZipArchive(); + if ($zip->open($tempFile) === true) { + $fileList = []; + for ($i = 0; $i < $zip->numFiles; $i++) { + $fileList[] = $zip->getNameIndex($i); + } + $zip->close(); + } + unlink($tempFile); + + return json_encode($fileList, JSON_PRETTY_PRINT); + } + + /** + * Decodes the content of an image file and extracts its metadata. + * + * This function processes the binary content of an image file (e.g., `.jpg`, `.png`, `.gif`) + * by temporarily saving it to the file system, reading its metadata using PHP's `exif_read_data`, + * and returning the metadata as a pretty-printed JSON string. If no metadata is found, a + * default message is returned. + * + * @param string $content The binary content of the image file. + * + * @return string A JSON-encoded string containing the image's metadata, or a message indicating + * that no metadata was found. + * @throws Exception If the file cannot be processed or metadata extraction fails. + */ + private function decodeImageContent(string $content): string + { + // Save the image temporarily + $tempFile = tempnam(sys_get_temp_dir(), 'img_'); + file_put_contents($tempFile, $content); + + // Use Tesseract OCR to extract text + $text = (new TesseractOCR($tempFile)) + ->lang('nld') // Set language to Dutch ('nld') + ->run(); + + // Remove temp file + unlink($tempFile); + + return $text ?: 'No text detected'; + } + + /** + * Formats an array of Node files into an array of metadata arrays. + * + * See https://nextcloud-server.netlify.app/classes/ocp-files-file for the Nextcloud documentation on the File class + * See https://nextcloud-server.netlify.app/classes/ocp-files-node for the Nextcloud documentation on the Node superclass + * + * @param Node[] $files Array of Node files to format + * + * @return array Array of formatted file metadata arrays + * @throws InvalidPathException + * @throws NotFoundException + */ + public function formatFiles(array $files): array + { + $formattedFiles = []; + + foreach($files as $file) { + // IShare documentation see https://nextcloud-server.netlify.app/classes/ocp-share-ishare + $shares = $this->findShares($file); + + $formattedFile = [ + 'id' => $file->getId(), + 'path' => $file->getPath(), + 'title' => $file->getName(), + 'accessUrl' => count($shares) > 0 ? $this->getShareLink($shares[0]) : null, + 'downloadUrl' => count($shares) > 0 ? $this->getShareLink($shares[0]).'/download' : null, + 'type' => $file->getMimetype(), + 'extension' => $file->getExtension(), + 'size' => $file->getSize(), + 'hash' => $file->getEtag(), + 'published' => (new DateTime())->setTimestamp($file->getCreationTime())->format('c'), + 'modified' => (new DateTime())->setTimestamp($file->getUploadTime())->format('c'), + ]; + + $formattedFiles[] = $formattedFile; + } + + return $formattedFiles; + } + /** * @param Node $file * @param int $shareType @@ -411,36 +714,6 @@ public function findShare(string $path, ?int $shareType = 3): ?IShare return null; } - /** - * Share a file or folder with a user group in Nextcloud. - * - * @param int $nodeId The file or folder to share. - * @param string $nodeType 'file' or 'folder', the type of node. - * @param string $target The target folder to share the node in. - * @param int $permissions Permissions the group members will have in the folder. - * @param string $groupId The id of the group to share the folder with. - * - * @return IShare The resulting share - * @throws Exception - */ - private function shareWithGroup(int $nodeId, string $nodeType, string $target, int $permissions, string $groupId): IShare - { - $share = $this->shareManager->newShare(); - $share->setTarget(target: '/'. $target); - $share->setNodeId(fileId:$nodeId); - $share->setNodeType(type:$nodeType); - - $share->setShareType(shareType: 1); - $share->setPermissions(permissions: $permissions); - $share->setSharedBy(sharedBy:$this->userSession->getUser()->getUID()); - $share->setShareOwner(shareOwner:$this->userSession->getUser()->getUID()); - $share->setShareTime(shareTime: new DateTime()); - $share->setSharedWith(sharedWith: $groupId); - $share->setStatus(status: $share::STATUS_ACCEPTED); - - return $this->shareManager->createShare($share); - } - /** * Creates a IShare object using the $shareData array data. * @@ -454,8 +727,13 @@ private function createShare(array $shareData) :IShare // Create a new share $share = $this->shareManager->newShare(); $share->setTarget(target: '/'.$shareData['path']); - $share->setNodeId(fileId: $shareData['file']->getId()); - $share->setNodeType(type: 'file'); + if (empty($shareData['file']) === false) { + $share->setNodeId(fileId: $shareData['file']->getId()); + } + if (empty($shareData['nodeId']) === false) { + $share->setNodeId(fileId: $shareData['nodeId']); + } + $share->setNodeType(type: $shareData['nodeType'] ?? 'file'); $share->setShareType(shareType: $shareData['shareType']); if ($shareData['permissions'] !== null) { $share->setPermissions(permissions: $shareData['permissions']); @@ -463,6 +741,9 @@ private function createShare(array $shareData) :IShare $share->setSharedBy(sharedBy: $shareData['userId']); $share->setShareOwner(shareOwner: $shareData['userId']); $share->setShareTime(shareTime: new DateTime()); + if (empty($shareData['sharedWith']) === false) { + $share->setSharedWith(sharedWith: $shareData['sharedWith']); + } $share->setStatus(status: $share::STATUS_ACCEPTED); return $this->shareManager->createShare(share: $share); @@ -551,17 +832,19 @@ public function createFolder(string $folderPath): bool } catch(NotFoundException $exception) { $rootFolder = $userFolder->newFolder(self::ROOT_FOLDER); - if($this->groupManager->groupExists(self::APP_GROUP) === false) { + if ($this->groupManager->groupExists(self::APP_GROUP) === false) { $this->groupManager->createGroup(self::APP_GROUP); } - $this->shareWithGroup( - nodeId: $rootFolder->getId(), - nodeType: $rootFolder->getType() === 'file' ? $rootFolder->getType() : 'folder', - target: self::ROOT_FOLDER, - permissions: 31, - groupId: self::APP_GROUP - ); + $this->createShare([ + 'path' => self::ROOT_FOLDER, + 'nodeId' => $rootFolder->getId(), + 'nodeType' => $rootFolder->getType() === 'file' ? $rootFolder->getType() : 'folder', + 'shareType' => 1, + 'permissions' => 31, + 'userId' => $this->userSession->getUser()->getUID(), + 'sharedWith' => self::APP_GROUP + ]); } try { diff --git a/lib/Service/ObjectService.php b/lib/Service/ObjectService.php index 5affa5c..ce15221 100644 --- a/lib/Service/ObjectService.php +++ b/lib/Service/ObjectService.php @@ -24,6 +24,8 @@ use OCP\App\IAppManager; use OCP\Files\Events\Node\NodeCreatedEvent; use OCP\Files\Folder; +use OCP\Files\InvalidPathException; +use OCP\Files\NotFoundException; use OCP\IAppConfig; use OCP\IURLGenerator; use Opis\JsonSchema\ValidationResult; @@ -1310,7 +1312,7 @@ private function handleFileProperty(ObjectEntity $objectEntity, array $object, s * * @param ObjectEntity|string $object The object or object ID to fetch files for * @return Node[] The files found - * @throws \OCP\Files\NotFoundException If the folder is not found + * @throws NotFoundException If the folder is not found * @throws DoesNotExistException If the object ID is not found */ public function getFiles(ObjectEntity|string $object): array @@ -1333,43 +1335,6 @@ public function getFiles(ObjectEntity|string $object): array return $files; } - /** - * Formats an array of Node files into an array of metadata arrays. - * - * See https://nextcloud-server.netlify.app/classes/ocp-files-file for the Nextcloud documentation on the File class - * See https://nextcloud-server.netlify.app/classes/ocp-files-node for the Nextcloud documentation on the Node superclass - * - * @param Node[] $files Array of Node files to format - * @return array Array of formatted file metadata arrays - */ - public function formatFiles(array $files): array - { - $formattedFiles = []; - - foreach($files as $file) { - // IShare documentation see https://nextcloud-server.netlify.app/classes/ocp-share-ishare - $shares = $this->fileService->findShares($file); - - $formattedFile = [ - 'id' => $file->getId(), - 'path' => $file->getPath(), - 'title' => $file->getName(), - 'accessUrl' => count($shares) > 0 ? $this->fileService->getShareLink($shares[0]) : null, - 'downloadUrl' => count($shares) > 0 ? $this->fileService->getShareLink($shares[0]).'/download' : null, - 'type' => $file->getMimetype(), - 'extension' => $file->getExtension(), - 'size' => $file->getSize(), - 'hash' => $file->getEtag(), - 'published' => (new DateTime())->setTimestamp($file->getCreationTime())->format('c'), - 'modified' => (new DateTime())->setTimestamp($file->getUploadTime())->format('c'), - ]; - - $formattedFiles[] = $formattedFile; - } - - return $formattedFiles; - } - /** * Hydrate files array with metadata. * @@ -1382,7 +1347,12 @@ public function formatFiles(array $files): array */ public function hydrateFiles(ObjectEntity $object, array $files): ObjectEntity { - $formattedFiles = $this->formatFiles($files); + try { + $formattedFiles = $this->fileService->formatFiles($files); + } catch (InvalidPathException|NotFoundException $e) { + + } + $object->setFiles($formattedFiles); return $object; }