From 135492301a30d47f6f33df69f1b8c89246ffd0b5 Mon Sep 17 00:00:00 2001 From: Vlad Byndych Date: Fri, 29 Sep 2023 17:10:20 +0200 Subject: [PATCH 1/2] feat: ontology import/export for neo4j. --- helpers/data/class.GenerisAdapterRdf.php | 2 +- migrations/Version202309111518342234_tao.php | 52 ++++++++++++++ scripts/tools/MigrateSqlToNeo4j.php | 73 ++++++++++++-------- scripts/update/OntologyUpdater.php | 51 +++++++++----- 4 files changed, 129 insertions(+), 49 deletions(-) create mode 100644 migrations/Version202309111518342234_tao.php diff --git a/helpers/data/class.GenerisAdapterRdf.php b/helpers/data/class.GenerisAdapterRdf.php index 154a4d465f..0e8efd3832 100644 --- a/helpers/data/class.GenerisAdapterRdf.php +++ b/helpers/data/class.GenerisAdapterRdf.php @@ -145,7 +145,7 @@ private function addResource(Graph $graph, core_kernel_classes_Resource $resourc ) { continue; } - $graph->add($triple->subject, $triple->predicate, $triple->object); + $graph->addResource($triple->subject, $triple->predicate, $triple->object); } else { if ($this->isSerializedFile($triple->object)) { continue; diff --git a/migrations/Version202309111518342234_tao.php b/migrations/Version202309111518342234_tao.php new file mode 100644 index 0000000000..6b5116bcef --- /dev/null +++ b/migrations/Version202309111518342234_tao.php @@ -0,0 +1,52 @@ +addReport( + $this->propagate(new SyncModels())([]) + ); + } + + public function down(Schema $schema): void + { + $this->throwIrreversibleMigrationException( + 'The models should be updated via `SyncModels` script after reverting their RDF definitions.' + ); + } +} diff --git a/scripts/tools/MigrateSqlToNeo4j.php b/scripts/tools/MigrateSqlToNeo4j.php index 3179ec5643..595faf9c90 100644 --- a/scripts/tools/MigrateSqlToNeo4j.php +++ b/scripts/tools/MigrateSqlToNeo4j.php @@ -32,6 +32,7 @@ use oat\oatbox\extension\script\ScriptAction; use oat\oatbox\reporting\Report; use oat\tao\model\TaoOntology; +use WikibaseSolutions\CypherDSL\Query; /** * php -dmemory_limit=1G index.php 'oat\tao\scripts\tools\MigrateSqlToNeo4j' -u -i -s 10000 -n 10000 -vvv @@ -242,12 +243,12 @@ public function extractDataFromSqlStorage(int $chunkSize): \Generator */ public function loadNTripleToNeo4j($neo4j, string $nTriple, int $neo4jChunkSize): void { - $nTriple = $this->escapeTriple($nTriple); - - $result = $neo4j->run(<<run( + << $nTriple] ); $responseMessage = $result->first(); @@ -263,31 +264,6 @@ public function loadNTripleToNeo4j($neo4j, string $nTriple, int $neo4jChunkSize) $this->logInfo('Chunk of triples successfully loaded.'); } - public function escapeTriple(string $nTriple): string - { - $escapeCharacters = [ - '\\\\' => '\\\\\\\\', //Escape double slash - '\"' => '\\\\"', // Escaped slash in escaped double quote - '\n' => '\\\\n', // Escaped slash in EOL - '\r' => '\\\\r', // Escaped slash in carriage return - '\t' => '\\\\t', // Escaped slash in horizontal tab - "'" => "\'", //Escape single quote - ]; - - $escapeList = []; - foreach ($escapeCharacters as $needle => $replacement) { - if (strpos($nTriple, $needle) !== false) { - $escapeList[$needle] = $replacement; - } - } - - if (!empty($escapeList)) { - $nTriple = str_replace(array_keys($escapeList), array_values($escapeList), $nTriple); - } - - return $nTriple; - } - protected function provideOptions(): array { return [ @@ -354,10 +330,47 @@ protected function run(): Report foreach ($nTripleList as $nTriple) { $this->loadNTripleToNeo4j($neo4j, $nTriple, $neo4jChunkSize); } + + $this->addSystemLabel($neo4j, $sqlChunkSize, $neo4jChunkSize); } catch (\Throwable $e) { return Report::createError($e->getMessage()); } return Report::createSuccess('Data transfer finished successfully.'); } + + private function addSystemLabel($neo4j, int $sqlChunkSize, int $neo4jChunkSize) + { + $sql = $this->getSqlAdapter(); + + /** @var \Doctrine\DBAL\ForwardCompatibility\Result $idResult */ + $result = $sql->query(<<<'SQL' + SELECT subject + FROM statements + WHERE modelid <> 1 + GROUP BY subject; +SQL); + + $subjectList = []; + while ($r = $result->fetchColumn()) { + $subjectList[] = $r; + + if (count($subjectList) >= $neo4jChunkSize) { + $systemNode = Query::node('Resource'); + $query = Query::new()->match($systemNode) + ->where($systemNode->property('uri')->in($subjectList)) + ->set($systemNode->labeled('System')); + $neo4j->runStatement($query); + $subjectList = []; + } + } + + if (!empty($subjectList)) { + $systemNode = Query::node('Resource'); + $query = Query::new()->match($systemNode) + ->where($systemNode->property('uri')->in($subjectList)) + ->set($systemNode->labeled('System')); + $neo4j->run($query->build()); + } + } } diff --git a/scripts/update/OntologyUpdater.php b/scripts/update/OntologyUpdater.php index 62d996ac83..ee757ca3bd 100644 --- a/scripts/update/OntologyUpdater.php +++ b/scripts/update/OntologyUpdater.php @@ -23,13 +23,11 @@ namespace oat\tao\scripts\update; use AppendIterator; -use oat\generis\model\kernel\persistence\file\FileModel; -use oat\generis\model\data\ModelManager; -use helpers_RdfDiff; -use core_kernel_persistence_smoothsql_SmoothModel; -use common_persistence_SqlPersistence; use common_ext_ExtensionsManager; -use core_kernel_persistence_smoothsql_SmoothIterator; +use helpers_RdfDiff; +use oat\generis\model\data\Model; +use oat\generis\model\data\ModelManager; +use oat\generis\model\kernel\persistence\file\FileModel; use oat\tao\model\extension\ExtensionModel; class OntologyUpdater @@ -37,20 +35,11 @@ class OntologyUpdater public static function syncModels() { $currentModel = ModelManager::getModel(); - $modelIds = array_diff($currentModel->getReadableModels(), ['1']); - - $persistence = common_persistence_SqlPersistence::getPersistence('default'); - $smoothIterator = new core_kernel_persistence_smoothsql_SmoothIterator($persistence, $modelIds); - - $nominalModel = new AppendIterator(); - foreach (common_ext_ExtensionsManager::singleton()->getInstalledExtensions() as $ext) { - $nominalModel->append(new ExtensionModel($ext)); - } - $langModel = \tao_models_classes_LanguageService::singleton()->getLanguageDefinition(); - $nominalModel->append($langModel); + $existingTriples = self::getCurrentTriples($currentModel); + $nominalTriples = self::getNominalTriples(); - $diff = helpers_RdfDiff::create($smoothIterator, $nominalModel); + $diff = helpers_RdfDiff::create($existingTriples, $nominalTriples); self::logDiff($diff); $diff->applyTo($currentModel); @@ -82,4 +71,30 @@ protected static function logDiff(\helpers_RdfDiff $diff) FileModel::toFile($path . DIRECTORY_SEPARATOR . 'add.rdf', $diff->getTriplesToAdd()); FileModel::toFile($path . DIRECTORY_SEPARATOR . 'remove.rdf', $diff->getTriplesToRemove()); } + + public static function getNominalTriples(): \Traversable + { + $nominalModel = new AppendIterator(); + foreach (common_ext_ExtensionsManager::singleton()->getInstalledExtensions() as $ext) { + $nominalModel->append(new ExtensionModel($ext)); + } + $langModel = \tao_models_classes_LanguageService::singleton()->getLanguageDefinition(); + $nominalModel->append($langModel); + return $nominalModel; + } + + public static function getCurrentTriples(Model $currentModel): \Traversable + { + return new \CallbackFilterIterator( + $currentModel->getRdfInterface()->getIterator(), + function (\core_kernel_classes_Triple $item) { + $isAutomaticIncludeRole = $item->subject === 'http://www.tao.lu/Ontologies/TAO.rdf#GlobalManagerRole' + && $item->predicate === 'http://www.tao.lu/Ontologies/generis.rdf#includesRole'; + $isGrantAccess = $item->predicate === 'http://www.tao.lu/Ontologies/taoFuncACL.rdf#GrantAccess'; + + + return !$isGrantAccess && !$isAutomaticIncludeRole; + } + ); + } } From ed38ea67c69a078b6134dcdb5c67a9c8a82dca17 Mon Sep 17 00:00:00 2001 From: Vlad Byndych Date: Fri, 13 Oct 2023 19:26:57 +0200 Subject: [PATCH 2/2] chore: introduced constants and added comments for better readability. --- scripts/tools/MigrateSqlToNeo4j.php | 5 +++-- scripts/update/OntologyUpdater.php | 13 ++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/scripts/tools/MigrateSqlToNeo4j.php b/scripts/tools/MigrateSqlToNeo4j.php index 595faf9c90..328717dbb7 100644 --- a/scripts/tools/MigrateSqlToNeo4j.php +++ b/scripts/tools/MigrateSqlToNeo4j.php @@ -342,12 +342,13 @@ protected function run(): Report private function addSystemLabel($neo4j, int $sqlChunkSize, int $neo4jChunkSize) { $sql = $this->getSqlAdapter(); + $nonSystemModelId = \core_kernel_persistence_smoothsql_SmoothModel::DEFAULT_WRITABLE_MODEL; /** @var \Doctrine\DBAL\ForwardCompatibility\Result $idResult */ - $result = $sql->query(<<<'SQL' + $result = $sql->query(<< 1 + WHERE modelid <> {$nonSystemModelId} GROUP BY subject; SQL); diff --git a/scripts/update/OntologyUpdater.php b/scripts/update/OntologyUpdater.php index ee757ca3bd..be931adc51 100644 --- a/scripts/update/OntologyUpdater.php +++ b/scripts/update/OntologyUpdater.php @@ -27,8 +27,10 @@ use helpers_RdfDiff; use oat\generis\model\data\Model; use oat\generis\model\data\ModelManager; +use oat\generis\model\GenerisRdf; use oat\generis\model\kernel\persistence\file\FileModel; use oat\tao\model\extension\ExtensionModel; +use oat\tao\model\user\TaoRoles; class OntologyUpdater { @@ -88,10 +90,15 @@ public static function getCurrentTriples(Model $currentModel): \Traversable return new \CallbackFilterIterator( $currentModel->getRdfInterface()->getIterator(), function (\core_kernel_classes_Triple $item) { - $isAutomaticIncludeRole = $item->subject === 'http://www.tao.lu/Ontologies/TAO.rdf#GlobalManagerRole' - && $item->predicate === 'http://www.tao.lu/Ontologies/generis.rdf#includesRole'; - $isGrantAccess = $item->predicate === 'http://www.tao.lu/Ontologies/taoFuncACL.rdf#GrantAccess'; + /** + * Those includes generated with a script and created in non-system space, so we ignore them. + * @see \tao_install_ExtensionInstaller::installManagementRole + */ + $isAutomaticIncludeRole = $item->subject === TaoRoles::GLOBAL_MANAGER + && $item->predicate === GenerisRdf::PROPERTY_ROLE_INCLUDESROLE; + // GrantAccess field added to entities in non-system space and also should be ignored for now. + $isGrantAccess = $item->predicate === 'http://www.tao.lu/Ontologies/taoFuncACL.rdf#GrantAccess'; return !$isGrantAccess && !$isAutomaticIncludeRole; }