Skip to content

Commit

Permalink
Fix#273 (#290)
Browse files Browse the repository at this point in the history
* Check if user can update node

...and add suffix '_OCR' to processed file if not.

Signed-off-by: XueSheng-GIT <Michael.Frase@gmx.de>

* Node update permissions adjustments

* Create dedicated private function for determining new file name
* Adjust and add tests
* Update Composer Deps

---------

Signed-off-by: XueSheng-GIT <Michael.Frase@gmx.de>
Co-authored-by: XueSheng-GIT <Michael.Frase@gmx.de>
  • Loading branch information
R0Wi and XueSheng-GIT authored Jan 15, 2025
1 parent d112c22 commit fe820e1
Show file tree
Hide file tree
Showing 8 changed files with 119 additions and 120 deletions.
36 changes: 18 additions & 18 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions lib/Helper/IProcessingFileAccessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@

interface IProcessingFileAccessor {
/**
* Returns the id of the file which is currently
* Returns the path of the file which is currently
* processed via OCR
* @return ?int
* @return ?string
*/
public function getCurrentlyProcessedFileId() : ?int;
public function getCurrentlyProcessedFilePath() : ?string;

/**
* Sets the id of the file which is currently
* Sets the path of the file which is currently
* processed via OCR
*/
public function setCurrentlyProcessedFileId(?int $fileId) : void;
public function setCurrentlyProcessedFilePath(?string $filePath) : void;
}
14 changes: 7 additions & 7 deletions lib/Helper/ProcessingFileAccessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@
namespace OCA\WorkflowOcr\Helper;

/**
* This class is a singleton which holds the id
* This class is a singleton which holds the path
* of the currently OCR processed file. This ensures
* that a files is not added to the processing queue
* if the 'postWrite' hook was triggered by a new
* version created by the OCR process.
*/
class ProcessingFileAccessor implements IProcessingFileAccessor {
/** @var ?int */
private $currentlyProcessedFileId;
/** @var ?string */
private $currentlyProcessedFilePath;

/** @var ProcessingFileAccessor */
private static $instance;
Expand All @@ -50,14 +50,14 @@ private function __construct() {
/**
* @inheritdoc
*/
public function getCurrentlyProcessedFileId() : ?int {
return $this->currentlyProcessedFileId;
public function getCurrentlyProcessedFilePath() : ?string {
return $this->currentlyProcessedFilePath;
}

/**
* @inheritdoc
*/
public function setCurrentlyProcessedFileId(?int $fileId) : void {
$this->currentlyProcessedFileId = $fileId;
public function setCurrentlyProcessedFilePath(?string $filePath) : void {
$this->currentlyProcessedFilePath = $filePath;
}
}
2 changes: 1 addition & 1 deletion lib/Operation.php
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ private function tryGetJobArgs(Node $node, $operation, & $argsArray) : bool {

private function eventTriggeredByOcrProcess(Node $node) : bool {
// Check if the event was triggered by OCR rewrite of the file
if ($node->getId() === $this->processingFileAccessor->getCurrentlyProcessedFileId()) {
if ($node->getPath() === $this->processingFileAccessor->getCurrentlyProcessedFilePath()) {
$this->logger->debug('Not processing event because file with path \'{path}\' was written by OCR process.',
['path' => $node->getPath()]);
return true;
Expand Down
41 changes: 30 additions & 11 deletions lib/Service/OcrService.php
Original file line number Diff line number Diff line change
Expand Up @@ -245,14 +245,13 @@ private function processTagsAfterSuccessfulOcr(File $file, WorkflowSettings $set
/**
* @param string $filePath The filepath of the file to write
* @param string $ocrContent The new filecontent (which was OCR processed)
* @param int $fileId The id of the file to write. Used for locking.
* @param int $fileMtime The mtime of the new file. Can be used to restore the original modification time of the non-OCR file.
*/
private function createNewFileVersion(string $filePath, string $ocrContent, int $fileId, ?int $fileMtime = null) : void {
private function createNewFileVersion(string $filePath, string $ocrContent, ?int $fileMtime = null) : void {
$dirPath = dirname($filePath);
$filename = basename($filePath);

$this->processingFileAccessor->setCurrentlyProcessedFileId($fileId);
$this->processingFileAccessor->setCurrentlyProcessedFilePath($filePath);

try {
$view = $this->viewFactory->create($dirPath);
Expand All @@ -267,7 +266,7 @@ private function createNewFileVersion(string $filePath, string $ocrContent, int
$view->touch($filename, $fileMtime);
}
} finally {
$this->processingFileAccessor->setCurrentlyProcessedFileId(null);
$this->processingFileAccessor->setCurrentlyProcessedFilePath(null);
}
}

Expand Down Expand Up @@ -307,24 +306,20 @@ private function setFileVersionsLabel(File $file, string $uid, string $label): v
private function doPostProcessing(Node $file, string $uid, WorkflowSettings $settings, OcrProcessorResult $result, ?int $fileMtime = null): void {
$this->processTagsAfterSuccessfulOcr($file, $settings);

$filePath = $file->getPath();
$fileId = $file->getId();
$fileContent = $result->getFileContent();
$originalFileExtension = $file->getExtension();
$newFileExtension = $result->getFileExtension();

// Only create a new file version if the file OCR result was not empty #130
if ($result->getRecognizedText() !== '') {
if ($settings->getKeepOriginalFileVersion()) {
if ($settings->getKeepOriginalFileVersion() && $file->isUpdateable()) {
// Add label to original file to prevent its expiry
$this->setFileVersionsLabel($file, $uid, self::FILE_VERSION_LABEL_VALUE);
}

$newFilePath = $originalFileExtension === $newFileExtension ?
$filePath :
$filePath . '.pdf';

$this->createNewFileVersion($newFilePath, $fileContent, $fileId, $fileMtime);
$newFilePath = $this->determineNewFilePath($file, $originalFileExtension, $newFileExtension);
$this->createNewFileVersion($newFilePath, $fileContent, $fileMtime);
}

$this->eventService->textRecognized($result, $file);
Expand All @@ -333,4 +328,28 @@ private function doPostProcessing(Node $file, string $uid, WorkflowSettings $set
$this->notificationService->createSuccessNotification($uid, $fileId);
}
}

/**
* Determines the new file path for a given file by analyzing the original- and new file extension.
* Also takes into consideration, if the file can be updated by the current user.
*
* @param Node $file The original file node for which the OCR processing has been succeeded.
* @param string $originalFileExtension The original file extension.
* @param string $newFileExtension The new file extension to be applied.
* @return string The new file path with the updated extension.
*/
private function determineNewFilePath(Node $file, string $originalFileExtension, string $newFileExtension): string {
$filePath = $file->getPath();
if ($originalFileExtension !== $newFileExtension) {
// If the extension changed, will create a new file with the new extension
return $filePath . '.' . $newFileExtension;
}
if (!$file->isUpdateable()) {
// Add suffix '_OCR' if original file cannot be updated
$fileInfo = pathinfo($filePath);
return $fileInfo['dirname'] . '/' . $fileInfo['filename'] . '_OCR.' . $newFileExtension;
}
// By returning the original file path, we will create a new file version of the original file
return $filePath;
}
}
6 changes: 3 additions & 3 deletions tests/Unit/Helper/ProcessingFIleAccessorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ public function testSingleton() {

public function testGetSet() {
$o = ProcessingFileAccessor::getInstance();
$o ->setCurrentlyProcessedFileId(42);
$this->assertEquals(42, $o->getCurrentlyProcessedFileId());
$o->setCurrentlyProcessedFileId(null);
$o ->setCurrentlyProcessedFilePath('/someuser/files/somefile.pdf');
$this->assertEquals('/someuser/files/somefile.pdf', $o->getCurrentlyProcessedFilePath());
$o->setCurrentlyProcessedFilePath(null);
}
}
Loading

0 comments on commit fe820e1

Please sign in to comment.