Skip to content

Commit

Permalink
Skip notifications when OCR mode is "skip file"
Browse files Browse the repository at this point in the history
* Fixes #232
* Move logic to OcrService
  • Loading branch information
R0Wi committed Oct 3, 2023
1 parent a2d974d commit 1b6e814
Show file tree
Hide file tree
Showing 10 changed files with 568 additions and 551 deletions.
116 changes: 1 addition & 115 deletions lib/BackgroundJobs/ProcessFileJob.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,47 +52,19 @@
class ProcessFileJob extends \OCP\BackgroundJob\QueuedJob {
/** @var LoggerInterface */
protected $logger;
/** @var IRootFolder */
private $rootFolder;
/** @var IOcrService */
private $ocrService;
/** @var IEventService */
private $eventService;
/** @var IViewFactory */
private $viewFactory;
/** @var IFilesystem */
private $filesystem;
/** @var IUserManager */
private $userManager;
/** @var IUserSession */
private $userSession;
/** @var IProcessingFileAccessor */
private $processingFileAccessor;
/** @var INotificationService */
private $notificationService;

public function __construct(
LoggerInterface $logger,
IRootFolder $rootFolder,
IOcrService $ocrService,
IEventService $eventService,
IViewFactory $viewFactory,
IFilesystem $filesystem,
IUserManager $userManager,
IUserSession $userSession,
IProcessingFileAccessor $processingFileAccessor,
INotificationService $notificationService,
ITimeFactory $timeFactory) {
parent::__construct($timeFactory);
$this->logger = $logger;
$this->rootFolder = $rootFolder;
$this->ocrService = $ocrService;
$this->eventService = $eventService;
$this->viewFactory = $viewFactory;
$this->filesystem = $filesystem;
$this->userManager = $userManager;
$this->userSession = $userSession;
$this->processingFileAccessor = $processingFileAccessor;
$this->notificationService = $notificationService;
}

Expand All @@ -104,13 +76,10 @@ protected function run($argument) : void {

try {
[$fileId, $uid, $settings] = $this->parseArguments($argument);
$this->initUserEnvironment($uid);
$this->processFile($fileId, $settings);
$this->ocrService->runOcrProcess($fileId, $uid, $settings);
} catch (\Throwable $ex) {
$this->logger->error($ex->getMessage(), ['exception' => $ex]);
$this->notificationService->createErrorNotification($uid, 'An error occured while executing the OCR process ('.$ex->getMessage().'). Please have a look at your servers logfile for more details.');
} finally {
$this->shutdownUserEnvironment();
}

$this->logger->debug('ENDED -- Run ' . self::class . ' job. Argument: {argument}.', ['argument' => $argument]);
Expand All @@ -135,87 +104,4 @@ private function parseArguments($argument) : array {
$settings
];
}

/**
* @param int $fileId The id of the file to be processed
* @param WorkflowSettings $settings The settings to be used for processing
*/
private function processFile(int $fileId, WorkflowSettings $settings) : void {
$node = $this->getNode($fileId);

$ocrFile = $this->ocrService->ocrFile($node, $settings);

$filePath = $node->getPath();
$fileContent = $ocrFile->getFileContent();
$originalFileExtension = $node->getExtension();
$newFileExtension = $ocrFile->getFileExtension();

// Only create a new file version if the file OCR result was not empty #130
if ($ocrFile->getRecognizedText() !== '') {
$newFilePath = $originalFileExtension === $newFileExtension ?
$filePath :
$filePath . ".pdf";

$this->createNewFileVersion($newFilePath, $fileContent, $fileId);
}

$this->eventService->textRecognized($ocrFile, $node);
}

private function getNode(int $fileId) : ?Node {
/** @var File[] */
$nodeArr = $this->rootFolder->getById($fileId);
if (count($nodeArr) === 0) {
throw new NotFoundException('Could not process file with id \'' . $fileId . '\'. File was not found');
}

$node = array_shift($nodeArr);

if (!$node instanceof Node || $node->getType() !== FileInfo::TYPE_FILE) {
throw new \InvalidArgumentException('Skipping process for file with id \'' . $fileId . '\'. It is not a file');
}

return $node;
}

/**
* * @param string $userId The owners userId of the file to be processed
*/
private function initUserEnvironment(string $userId) : void {
/** @var IUser */
$user = $this->userManager->get($userId);
if (!$user) {
throw new NoUserException("User with uid '$userId' was not found");
}

$this->userSession->setUser($user);
$this->filesystem->init($userId, '/' . $userId . '/files');
}

private function shutdownUserEnvironment() : void {
$this->userSession->setUser(null);
}

/**
* @param string $filePath The filepath of the file to write
* @param string $ocrContent The new filecontent (which was OCR processed)
* @param int $fileId The id of the file to write. Used for locking.
*/
private function createNewFileVersion(string $filePath, string $ocrContent, int $fileId) : void {
$dirPath = dirname($filePath);
$filename = basename($filePath);

$this->processingFileAccessor->setCurrentlyProcessedFileId($fileId);

try {
$view = $this->viewFactory->create($dirPath);
// Create new file or file-version with OCR-file
// This will trigger 'postWrite' event which would normally
// add the file to the queue again but this is tackled
// by the processingFileAccessor.
$view->file_put_contents($filename, $ocrContent);
} finally {
$this->processingFileAccessor->setCurrentlyProcessedFileId(null);
}
}
}
32 changes: 32 additions & 0 deletions lib/Exception/OcrResultEmptyException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?php

declare(strict_types=1);

/**
* @copyright Copyright (c) 2023 Robin Windey <[email protected]>
*
* @license GNU AGPL version 3 or any later version
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

namespace OCA\WorkflowOcr\Exception;

use Exception;

class OcrResultEmptyException extends Exception {
public function __construct(string $message) {
$this->message = $message;
}
}
2 changes: 1 addition & 1 deletion lib/Migration/Version2404Date20220903071748.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ private function getDatasetsToMigrate() : array {
$workflowSettings = json_decode($row['operation'], true);
$foundMapping = false;
$newLangArr = [];
if (!array_key_exists('languages', $workflowSettings)) {
if (!$workflowSettings || !array_key_exists('languages', $workflowSettings)) {
continue;
}
$languagesArr = $workflowSettings['languages'];
Expand Down
3 changes: 2 additions & 1 deletion lib/OcrProcessors/OcrMyPdfBasedProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

use Cocur\Chain\Chain;
use OCA\WorkflowOcr\Exception\OcrNotPossibleException;
use OCA\WorkflowOcr\Exception\OcrResultEmptyException;
use OCA\WorkflowOcr\Helper\ISidecarFileAccessor;
use OCA\WorkflowOcr\Model\GlobalSettings;
use OCA\WorkflowOcr\Model\WorkflowSettings;
Expand Down Expand Up @@ -86,7 +87,7 @@ public function ocrFile(File $file, WorkflowSettings $settings, GlobalSettings $
$ocrFileContent = $this->command->getOutput();

if (!$ocrFileContent) {
throw new OcrNotPossibleException('OCRmyPDF did not produce any output for file ' . $file->getPath());
throw new OcrResultEmptyException('OCRmyPDF did not produce any output for file ' . $file->getPath());
}

$recognizedText = $this->sidecarFileAccessor->getSidecarFileContent();
Expand Down
11 changes: 6 additions & 5 deletions lib/Service/IOcrService.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,19 @@

use OCA\WorkflowOcr\Model\WorkflowSettings;
use OCA\WorkflowOcr\OcrProcessors\OcrProcessorResult;
use OCP\Files\File;

interface IOcrService {
/**
* Processes OCR on the given file
* Processes OCR on the given file. Creates a new file version and emits appropriate events.
*
* @param string $mimeType The mimetype of the file to be processed
* @param string $fileContent The file to be processed
* @param int $fileId The id if the file to be processed
* @param string $uid The id of the user who has access to this file
* @param WorkflowSettings $settings The settings to be used for processing
*
* @throws \OCA\WorkflowOcr\Exception\OcrNotPossibleException
* @throws \OCA\WorkflowOcr\Exception\OcrProcessorNotFoundException
* @throws \OCA\WorkflowOcr\Exception\OcrResultEmptyException
* @throws \InvalidArgumentException
*/
public function ocrFile(File $file, WorkflowSettings $settings) : OcrProcessorResult;
public function runOcrProcess(int $fileId, string $uid, WorkflowSettings $settings) : void;
}
Loading

0 comments on commit 1b6e814

Please sign in to comment.