Skip to content

Commit

Permalink
Merge pull request #1 from esmero/ISSUE-3
Browse files Browse the repository at this point in the history
Issue 3
  • Loading branch information
giancarlobi authored Nov 23, 2020
2 parents 93896a6 + 5c6fa9f commit 7b6d6ce
Show file tree
Hide file tree
Showing 8 changed files with 521 additions and 47 deletions.
47 changes: 47 additions & 0 deletions config/schema/strawberry_runners.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,44 @@ strawberryfield_runners.strawberry_runners_postprocessor.*:
strawberryfield_runners.strawberry_runners_postprocessor.binary:
type: config_object
label: 'Strawberry Runners Post Processor Config Entity Binary specific config'
mapping:
source_type:
type: string
label: 'The type of Source Data this Processor works on'
ado_type:
type: string
label: 'DO type(s) to limit this Processor to'
jsonkey:
type: sequence
label: 'The JSON key(s) containing the desired Source File(s)'
sequence:
- type: string
mime_type:
type: string
label: 'Mimetypes(s) to limit this Processor to'
path:
type: string
label: 'The path for he binary to execute'
arguments:
type: string
label: 'Any additional argument your executable binary requires'
output_type:
type: string
label: 'The expected and desired output of this processor'
output_destination:
type: sequence
label: 'Where and how the output will be used'
sequence:
- type: string
timeout:
type: integer
label: 'Timeout in seconds for this process'
weight:
type: integer
label: 'Order or execution in the global chain'
strawberryfield_runners.strawberry_runners_postprocessor.ocr:
type: config_object
label: 'Strawberry Runners Post Processor Config Entity OCR specific config'
mapping:
source_type:
type: string
Expand All @@ -49,6 +87,15 @@ strawberryfield_runners.strawberry_runners_postprocessor.binary:
arguments:
type: string
label: 'Any additional argument your executable binary requires'
tesseract_arguments:
type: string
label: 'Any additional argument your executable binary requires'
path:
type: string
label: 'The path for he binary to execute'
tesseract_path:
type: string
label: 'The path for he binary to execute'
output_type:
type: string
label: 'The expected and desired output of this processor'
Expand Down
9 changes: 8 additions & 1 deletion src/Annotation/StrawberryRunnersPostProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ class StrawberryRunnersPostProcessor extends Plugin {
*/
public $input_property;

/**
* The Object property that contains the additional data needed by the Processor ::run method
*
* @var string $input_arguments;
*
*/
public $input_arguments;

/**
* Processing stage: can be Entity PreSave or Index time search_api
Expand All @@ -64,4 +71,4 @@ class StrawberryRunnersPostProcessor extends Plugin {
*/
public $when = StrawberryRunnersPostProcessor::PRESAVE;

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
use Drupal\Core\Entity\EntityTypeManagerInterface;
use Drupal\Core\Session\AccountInterface;
use Drupal\strawberryfield\Event\StrawberryfieldCrudEvent;
use Drupal\strawberryfield\EventSubscriber\StrawberryfieldEventPresaveSubscriber;
use Drupal\Core\StringTranslation\StringTranslationTrait;
use Drupal\Core\StringTranslation\TranslationInterface;
use Drupal\Core\Messenger\MessengerInterface;
Expand All @@ -14,14 +13,14 @@
use Drupal\Component\Utility\Unicode;
use Drupal\file\FileInterface;
use Drupal\Core\StreamWrapper\StreamWrapperManagerInterface;
use Drupal\Core\StreamWrapper\StreamWrapperInterface;
use Drupal\Core\File\FileSystemInterface;
use Drupal\strawberry_runners\Plugin\StrawberryRunnersPostProcessorPluginManager;
use Drupal\strawberryfield\EventSubscriber\StrawberryfieldEventSaveSubscriber;

/**
* Event subscriber for SBF bearing entity json process event.
*/
class StrawberryRunnersEventPreSavePostProcessingSubscriber extends StrawberryfieldEventPresaveSubscriber {
class StrawberryRunnersEventSavePostProcessingSubscriber extends StrawberryfieldEventSaveSubscriber {


use StringTranslationTrait;
Expand Down Expand Up @@ -150,7 +149,7 @@ public function __construct(
* @throws \Drupal\Component\Plugin\Exception\PluginException
* @throws \Drupal\Component\Plugin\Exception\PluginNotFoundException
*/
public function onEntityPresave(StrawberryfieldCrudEvent $event) {
public function onEntitySave(StrawberryfieldCrudEvent $event) {

/* @var $plugin_config_entities \Drupal\strawberry_runners\Entity\strawberryRunnerPostprocessorEntity[] */
$plugin_config_entities = $this->entityTypeManager->getListBuilder('strawberry_runners_postprocessor')->load();
Expand Down Expand Up @@ -232,9 +231,9 @@ public function onEntityPresave(StrawberryfieldCrudEvent $event) {
/** @var $itemfield \Drupal\strawberryfield\Plugin\Field\FieldType\StrawberryFieldItem */
$flatvalues = (array) $itemfield->provideFlatten();
// Run first on entity:files
$sbf_type = NULL;
$sbf_type = [];
if (isset($flatvalues['type'])) {
$sbf_type = $flatvalues['type'];
$sbf_type = (array) $flatvalues['type'];
}
foreach ($askeymap as $jsonkey => $activePlugins) {
if (isset($flatvalues[$jsonkey])) {
Expand All @@ -243,12 +242,16 @@ public function onEntityPresave(StrawberryfieldCrudEvent $event) {

foreach($activePlugins as $activePluginId => $config) {
$valid_mimes = [];
if (empty($config['ado_type']) || in_array($config['ado_type'] , $sbf_type)) {
//@TODO also split $config['ado_type'] so we can check
$valid_ado_type = [];
$valid_ado_type = explode(',', $config['ado_type']);
if (empty($config['ado_type']) || count(array_intersect($valid_ado_type , $sbf_type)) > 0) {
$valid_mimes = explode(',', $config['mime_type']);
if (empty($valid_mimes) || (isset($asstructure["dr:mimetype"]) && in_array($asstructure["dr:mimetype"], $valid_mimes))) {
$data = new \stdClass();
$data->fid = $asstructure['dr:fid'];
$data->nid = $entity->id();
$data->nuuid = $entity->uuid();
// We are passing also the full file metadata.
// This gives us an advantage so we can reuse
// Sequence IDs, PDF pages, etc and act on them
Expand All @@ -260,6 +263,16 @@ public function onEntityPresave(StrawberryfieldCrudEvent $event) {
// $activePluginId? That would allow us to skip reprocessing
// Easier?
$data->metadata = $asstructure;

// @TODO how to force?
// Can be a state key, valuekey, or a JSON passed property.
// Issue with JSON passed property is that we can no longer
// Here modify it (Entity is saved)
// So we should really better have a non Metadata method for this
// Or/ we can have a preSave Subscriber that reads the prop,
// sets the state and then removes if before saving

$data->force = FALSE;
$data->plugin_config_entity_id = $activePluginId;
// See https://github.com/esmero/strawberry_runners/issues/10
// Since the destination Queue can be a modal thing
Expand Down
66 changes: 42 additions & 24 deletions src/Plugin/QueueWorker/IndexPostProcessorQueueWorker.php
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,13 @@ public function processItem($data) {

$processor_instance = $this->getProcessorPlugin($data->plugin_config_entity_id);

if (!isset($data->fid) || $data->fid == NULL || !isset($data->nid) || $data->nid == NULL) {
if (!isset($data->fid) || $data->fid == NULL || !isset($data->nid) || $data->nid == NULL || !is_array($data->metadata)) {
return;
}
$file = $this->entityTypeManager->getStorage('file')->load($data->fid);

if ($file === NULL) {
if ($file === NULL || !isset($data->metadata['checksum'])) {
error_log('Sorry the file does not exist or has no checksum yet. We really need the checksum');
return;
}
//@TODO should we wrap this around a try catch?
Expand All @@ -188,37 +189,54 @@ public function processItem($data) {

// Skip file if element is found in key_value collection.
$processed_data = $this->keyValue->get($keyvalue_collection)->get($key);

if (empty($processed_data)) {
error_log('Is this already in our temp keyValue?');
error_log(empty($processed_data));
//@TODO allow a force in case of corrupted key value? Partial output
// Extragenous weird data?
if (true || empty($processed_data) ||
$data->force == TRUE ||
(!isset($processed_data->checksum) ||
empty($processed_data->checksum) ||
$processed_data->checksum != $data->metadata['checksum'])) {
// Extract file and save it in key_value collection.
$io = new \stdClass();
$input = new \stdClass();
$input->filepath = $filelocation;

$input->page_number = 1;
// The Node UUID
$input->nuuid = $data->nuuid;
// All the rest of the associated Metadata in an as:structure
$input->metadata = $data->metadata;
$io->input = $input;
$io->output = NULL;
//@TODO implement the TEST and BENCHMARK logic here
// RUN should return exit codes so we can know if something failed
// And totally discard indexing.
$extracted_data = $processor_instance->run($io, StrawberryRunnersPostProcessorPluginInterface::PROCESS);
error_log ('processing just run');
error_log($io->ouput);
error_log('writing to keyvalue');
error_log($key);
$this->keyValue->get($keyvalue_collection)->set($key, $io->output);
}

// Get which indexes have our StrawberryfieldFlavorDatasource enabled!
$indexes = StrawberryfieldFlavorDatasource::getValidIndexes();

$item_ids = [];
if (is_a($entity, TranslatableInterface::class)) {
$translations = $entity->getTranslationLanguages();
foreach ($translations as $translation_id => $translation) {
$item_ids[] = $entity->id() . ':'.'1' .':'.$translation_id.':'.$file->uuid().':'.$data->plugin_config_entity_id;
$toindex = new \stdClass();
$toindex->fulltext = $io->output;
$toindex->checksum = $data->metadata['checksum'];
error_log(var_export($toindex,true));
$this->keyValue->get($keyvalue_collection)->set($key, $toindex);

// Get which indexes have our StrawberryfieldFlavorDatasource enabled!
$indexes = StrawberryfieldFlavorDatasource::getValidIndexes();

$item_ids = [];
if (is_a($entity, TranslatableInterface::class)) {
$translations = $entity->getTranslationLanguages();
foreach ($translations as $translation_id => $translation) {
$item_ids[] = $entity->id() . ':'.'1' .':'.$translation_id.':'.$file->uuid().':'.$data->plugin_config_entity_id;
}
}
error_log(var_export($item_ids,true));
$datasource_id = 'strawberryfield_flavor_datasource';
foreach ($indexes as $index) {
$index->trackItemsInserted($datasource_id, $item_ids);
}
}
error_log(var_export($item_ids,true));
$datasource_id = 'strawberryfield_flavor_datasource';
foreach ($indexes as $index) {
$index->trackItemsUpdated($datasource_id, $item_ids);
}
}
catch (\Exception $exception) {
Expand Down Expand Up @@ -252,7 +270,7 @@ private function ensureFileAvailability(FileInterface $file) {
// Check first if the file is already around in temp?
// @TODO can be sure its the same one? Ideas?
if (is_readable(
$this->fileSystem->realpath(
$this->fileSystem->realpath(
'temporary://sbr_' . $cache_key . '_' . basename($uri)
)
)) {
Expand Down Expand Up @@ -306,4 +324,4 @@ public function getRealpath($uri) {
}
}

}
}
Loading

0 comments on commit 7b6d6ce

Please sign in to comment.