From fc3ab7bb012556a07b549c66390a600ed71fc93f Mon Sep 17 00:00:00 2001 From: Konrad Abicht Date: Tue, 16 Apr 2024 16:17:05 +0200 Subject: [PATCH] refined existing extractors, re-added BioPortal extractor --- README.md | 16 +- scripts/bin/bootstrap.php | 2 +- scripts/bin/renew_index.php | 2 + scripts/src/Cache.php | 32 ++- scripts/src/Extractor/AbstractExtractor.php | 121 ++++++---- scripts/src/Extractor/BioPortal.php | 218 ++++++++++++++++++ scripts/src/Extractor/DBpediaArchivo.php | 16 +- .../src/Extractor/LinkedOpenVocabularies.php | 9 +- .../src/Extractor/OntologyLookupService.php | 12 +- scripts/src/IndexEntry.php | 4 +- 10 files changed, 352 insertions(+), 80 deletions(-) create mode 100644 scripts/src/Extractor/BioPortal.php diff --git a/README.md b/README.md index 561972f..207e90c 100644 --- a/README.md +++ b/README.md @@ -9,10 +9,11 @@ You can find the index file here: [**index.csv**](./index.csv) (**3000+** ontolo ### Requirements and rules for an ontology-entry in the index -An RDF ontology/vocabulary is part of the index if it meets the following requirements: +An RDF ontology / vocabulary is part of the index if it meets the following criteria: * non-empty, valid title -* non-empty, valid URI +* non-empty, valid IRI * at least one valid URL to a RDF file + * at least one RDFS/OWL class is defined or at least one instance of owl:Ontology is found If an entry is part of multiple sources (e.g. LOV and DBpedia Archivo), the one which appears first is taken. @@ -150,11 +151,7 @@ But if you wanna help out with one of the topics, feel free to open an issue or * [x] license * [x] authors + contributors * [x] project page / homepage - * [ ] data source url -* [ ] check prior versions of an ontology to avoid adding the same ontology just with different versions - * [ ] http vs https - * [ ] / vs # at the end -* [ ] mark entries if they contain SKOS entries + * [x] data source url ### Version 0.2 @@ -164,7 +161,6 @@ But if you wanna help out with one of the topics, feel free to open an issue or * [ ] https://obofoundry.org/ * [ ] http://www.oegov.us/ * [ ] http://ontologydesignpatterns.org/wiki/Main\_Page - * [ ] https://obofoundry.org/ * [ ] https://github.com/linkeddata/ontology-archiver * [ ] crawl Github repositories tagged with "ontology" etc. * [ ] harmonize datetime information for latest access (all UTC?) @@ -172,6 +168,10 @@ But if you wanna help out with one of the topics, feel free to open an issue or * [ ] add a way to manually provide entries via Github * [ ] Ping service: on update call a list of URLs to let them know that there was a change * [ ] generate statistics for each service read to build index.csv (contains number of entries etc.) +* [ ] check prior versions of an ontology to avoid adding the same ontology just with different versions + * [ ] http vs https + * [ ] / vs # at the end +* [ ] mark entries if they contain SKOS entries ## License diff --git a/scripts/bin/bootstrap.php b/scripts/bin/bootstrap.php index 9a803c4..3de6d69 100644 --- a/scripts/bin/bootstrap.php +++ b/scripts/bin/bootstrap.php @@ -23,7 +23,7 @@ 'http://xmlns.com/foaf/0.1/name', ]; -$bioPortalApiKeyFile = ROOT_DIR_PATH.DIRECTORY_SEPARATOR.'scripts'.DIRECTORY_SEPARATOR.'.api_key_bioportal.php'; +define('BIOPORTAL_API_KEY_FILE', ROOT_DIR_PATH.DIRECTORY_SEPARATOR.'scripts'.DIRECTORY_SEPARATOR.'.api_key_bioportal.php'); // include vendor libraries require_once SCRIPTS_DIR_PATH.'vendor'.DIRECTORY_SEPARATOR.'autoload.php'; diff --git a/scripts/bin/renew_index.php b/scripts/bin/renew_index.php index f0bb8f1..1b48bf6 100644 --- a/scripts/bin/renew_index.php +++ b/scripts/bin/renew_index.php @@ -21,6 +21,8 @@ (new LinkedOpenVocabularies($cache, $dataFactory, $temporaryIndex))->run(); (new DBpediaArchivo($cache, $dataFactory, $temporaryIndex))->run(); (new OntologyLookupService($cache, $dataFactory, $temporaryIndex))->run(); +(new BioPortal($cache, $dataFactory, $temporaryIndex))->run(); +return; // finalize temporary index and write index.csv (new MergeInManuallyMaintainedMetadata($cache, $dataFactory, $temporaryIndex))->run(); diff --git a/scripts/src/Cache.php b/scripts/src/Cache.php index bfe644d..d00d6b9 100644 --- a/scripts/src/Cache.php +++ b/scripts/src/Cache.php @@ -16,6 +16,8 @@ class Cache */ private array $caches = []; + private string $filesFolder = __DIR__.'/../var/downloaded_rdf_files/'; + private function getCacheInstance(string $namespace): AbstractAdapter { if (false === isset($this->caches[$namespace])) { @@ -25,6 +27,26 @@ private function getCacheInstance(string $namespace): AbstractAdapter return $this->caches[$namespace]; } + private function createSimplifiedFilename(string $fileUrl): string + { + return preg_replace('/[^a-z0-9\-_]/ism', '_', $fileUrl); + } + + /** + * @return non-empty-string + */ + public function getCachedFilePathForFileUrl(string $fileUrl): string + { + $fileRes = $this->getLocalFileResourceForFileUrl($fileUrl); + + if (is_resource($fileRes)) { + // generate simplified filename for local storage + return $this->filesFolder.$this->createSimplifiedFilename($fileUrl); + } else { + throw new Exception('Got no file resource for '.$fileUrl); + } + } + /** * @return resource|false Return value of fopen(..., 'r') * @@ -32,12 +54,8 @@ private function getCacheInstance(string $namespace): AbstractAdapter */ public function getLocalFileResourceForFileUrl(string $fileUrl) { - $filesFolder = __DIR__.'/../var/downloaded_rdf_files/'; - - // generate simplified filename for local storage - $filename = preg_replace('/[^a-z0-9\-_]/ism', '_', $fileUrl); - - $filepath = $filesFolder.$filename; + $filename = $this->createSimplifiedFilename($fileUrl); + $filepath = $this->filesFolder.$filename; echo PHP_EOL.$fileUrl.' >> '.$filename; @@ -80,7 +98,7 @@ public function getLocalFileResourceForFileUrl(string $fileUrl) public function sendCachedRequest(string $url, string $namespace): string { $cache = $this->getCacheInstance($namespace); - $key = (string) preg_replace('/[\W]/', '_', $url); + $key = $this->createSimplifiedFilename($url); // ask cache for entry // if there isn't one, run HTTP request and return response content diff --git a/scripts/src/Extractor/AbstractExtractor.php b/scripts/src/Extractor/AbstractExtractor.php index 7a4a3e9..c95aaff 100644 --- a/scripts/src/Extractor/AbstractExtractor.php +++ b/scripts/src/Extractor/AbstractExtractor.php @@ -55,7 +55,13 @@ abstract public function run(): void; public function addFurtherMetadata(IndexEntry $indexEntry, Graph $graph): void { // short description / summary - $properties = ['skos:definition', 'dc11:description', 'dc:description', 'rdfs:comment']; + $properties = [ + 'skos:definition', + 'dc11:description', + 'dc:description', + 'rdfs:comment', + 'schema:description', + ]; $valuesString = $this->getLiteralValuesAsString($graph, $properties, (string) $indexEntry->getOntologyIri()); $valuesString = $this->cleanString($valuesString); if (false === isEmpty($valuesString)) { @@ -64,7 +70,7 @@ public function addFurtherMetadata(IndexEntry $indexEntry, Graph $graph): void // license $valuesString = null; - foreach (['dc:license', 'dc11:rights'] as $prop) { + foreach (['dc:license', 'dc11:rights', 'schema:license'] as $prop) { $valuesString = $this->getLiteralValuesAsString($graph, [$prop], (string) $indexEntry->getOntologyIri(), ' ', true); $valuesString = $this->getAlignedLicenseInformation($valuesString); @@ -93,7 +99,7 @@ public function addFurtherMetadata(IndexEntry $indexEntry, Graph $graph): void } // project page / homepage - $properties = ['foaf:homepage', 'schema:WebSite', 'schema:url']; + $properties = ['foaf:homepage', 'schema:WebSite', 'schema:url', 'rdfs:seeAlso']; $valuesString = $this->getLiteralValuesAsString($graph, $properties, (string) $indexEntry->getOntologyIri()); $valuesString = $this->cleanString($valuesString); if (false === isEmpty($valuesString)) { @@ -103,7 +109,7 @@ public function addFurtherMetadata(IndexEntry $indexEntry, Graph $graph): void /* * latest access (latest file) */ - $properties = ['dc:modified', 'dc11:modified']; + $properties = ['dc:modified', 'dc11:modified', 'schema:dateModified']; foreach ($properties as $prop) { $values = $graph->resource($indexEntry->getOntologyIri())->allLiterals($prop); @@ -111,7 +117,7 @@ public function addFurtherMetadata(IndexEntry $indexEntry, Graph $graph): void $values = array_map(function ($value) { if ($value instanceof DateTime || $value instanceof Date) { return $value->format('Y-m-d'); - } else { + } elseif(1 === preg_match('/[0-9]{4}\-[0-9]{2}\-[0-9]{4}/', $value->getValue())) { return $value->getValue(); } }, $values); @@ -194,34 +200,6 @@ protected function getLiteralValuesAsString( return $valuesString; } - /** - * @param non-empty-string $fileUrl - * - * @throws \Exception - */ - public function guessFormatOnFile(string $fileUrl): string|null - { - $fileHandle = $this->cache->getLocalFileResourceForFileUrl($fileUrl); - if (false === is_resource($fileHandle)) { - throw new Exception('Could not open related file for '.$fileUrl); - } - - $lengthInMb = 1024 * 100; - $str = (string) fread($fileHandle, $lengthInMb); - - fclose($fileHandle); - - $format = Format::guessFormat($str)?->getName() ?? null; - if (null == $format) { - // it only uses the first 1024 bytes, ... try with more bytes - if (str_contains($str, 'cache->getLocalFileResourceForFileUrl($fileUrl); + } catch (Throwable $th) { + if ( + str_contains($th->getMessage(), 'HTTP/1.1 403 Forbidden') + || str_contains($th->getMessage(), 'HTTP/1.1 504 Gateway Time-out') + ) { + echo PHP_EOL.$th->getMessage(); + return null; + } else { + throw $th; + } + } + + if (false === is_resource($fileHandle)) { + throw new Exception('Could not open related file for '.$fileUrl); + } + + $lengthInMb = 1024 * 100; + $str = (string) fread($fileHandle, $lengthInMb); + + fclose($fileHandle); + + $format = Format::guessFormat($str)?->getName() ?? null; + if (null == $format) { + // it only uses the first 1024 bytes, ... try with more bytes + if (str_contains($str, 'dataFactory, $format) as $quad) { + foreach (Util::parse($fileHandle, $this->dataFactory) as $quad) { $list[] = $quad; if ($i++ > $maxAmountOfTriples) { break; @@ -364,18 +380,11 @@ protected function loadQuadsIntoEasyRdfGraph( || str_contains($th->getMessage(), 'on line') ) { echo PHP_EOL.' - quickRdfIo failed, trying rapper'.PHP_EOL; - /* - * use rapper command to read the RDF file and return nquads - */ - if (isEmpty($format)) { - // FYI: https://librdf.org/raptor/rapper.html - $format = '--guess'; - } else { - $format = '-i '.substr((string) $format, 0, 20); - } - $command = 'rapper '.$format.' -o ntriples '.$rdfFileUrl; + // build and execute command using system shell + $command = 'rapper --guess -o ntriples '.$localFilePath; $nquads = (string) shell_exec($command); + // limit amount of entries $triples = explode(PHP_EOL, $nquads); $triples = array_slice($triples, 0, $maxAmountOfTriples); @@ -386,4 +395,16 @@ protected function loadQuadsIntoEasyRdfGraph( } } } + + /** + * Checks if ontology file contains elements of a certain type. + */ + protected function ontologyFileContainsElementsOfCertainTypes(Graph $graph): bool + { + return + 0 < count($graph->allOfType('owl:Ontology')) + || 0 < count($graph->allOfType('owl:Class')) + || 0 < count($graph->allOfType('rdfs:Class')) + ; + } } diff --git a/scripts/src/Extractor/BioPortal.php b/scripts/src/Extractor/BioPortal.php new file mode 100644 index 0000000..62d6504 --- /dev/null +++ b/scripts/src/Extractor/BioPortal.php @@ -0,0 +1,218 @@ +apiKey = require BIOPORTAL_API_KEY_FILE; + } + + /** + * @throws \Exception + */ + public function run(): void + { + echo PHP_EOL; + echo '-------------------------------------------------'; + echo PHP_EOL; + echo 'BioPortal - Extraction started ...'; + echo PHP_EOL; + + foreach ($this->getOntologiesToProcess() as $ontology) { + $newEntry = $this->getPreparedIndexEntry(); + + // title + $newEntry->setOntologyTitle($ontology['name']); + + echo PHP_EOL; + echo '---------------------------------------------------------------------'; + echo PHP_EOL; + echo 'Next: '.$newEntry->getOntologyTitle(); + + // URI + // example: https://github.com/ncbo/ontologies_api/issues/138#issuecomment-2035077045 + $url = $ontology['links']['latest_submission'].'?include=all&display_context=false&display_links=false&apikey='; + $url .= $this->apiKey; + $content = $this->cache->sendCachedRequest($url, $this->namespace); + $arr = json_decode($content, true); + if (isEmpty($arr['uri'] ?? null)) { + echo PHP_EOL.' - IGNORED because latest submission is empty > '.$url.PHP_EOL; + continue; + } else { + $newEntry->setOntologyIri($arr['uri']); + } + + if ($this->temporaryIndex->hasEntry($newEntry->getOntologyIri())) { + echo PHP_EOL.'- entry already in temp. index, skipping'; + continue; + } + + // latest access (== latest_submission.released, but only year + month + day) + $released = new DateTime($arr['released']); + $newEntry->setLatestAccess($released->format('Y-m-d')); + + // link UI page + $newEntry->setSourcePage($ontology['links']['ui']); + + // get related RDF data + $uiContent = $this->cache->sendCachedRequest($ontology['links']['ui'], $this->namespace); + $regex = "/href='(https:\/\/data\.bioontology\.org\/ontologies\/[a-zA-Z\-_]+\/download)\?apikey=.*?&(download_format=rdf)/smi"; + preg_match($regex, $uiContent, $match); + // try RDF/XML link using the UI link (most reliable) + if (isset($match[1]) && isset($match[2])) { + $ontologyFile = $match[1].'?'.$match[2]; + $ontologyFileWithApiKey = $ontologyFile .'&apikey='.$this->apiKey; + $format = 'rdfxml'; + echo PHP_EOL.' - use ui link'; + } else { + // determine RDF file location, file handle, format and related Graph instance + $ontologyFile = $ontology['links']['download']; + $ontologyFileWithApiKey = $ontologyFile.'?apikey='.$this->apiKey; + $format = $this->guessFormatOnFile($ontologyFileWithApiKey); + + if (null == $format) { + echo PHP_EOL.' - unknown format'; + continue; + } else { + echo PHP_EOL.' - use download link'; + } + } + + if (in_array($ontologyFileWithApiKey, [ + // 'https://data.bioontology.org/ontologies/AURA/download?download_format=rdf&apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + // 'https://data.bioontology.org/ontologies/NCBITAXON/download?apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + // 'https://data.bioontology.org/ontologies/CHEBI/download?download_format=rdf&apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + + // https___data_bioontology_org_ontologies_LOINC_download_apikey_25488e1c-c440-4bf4-8c59-97e17f217522 + // 'https://data.bioontology.org/ontologies/LOINC/download?apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + + // timeouted + // https___data_bioontology_org_ontologies_RDL_download_download_format_rdf_apikey_25488e1c-c440-4bf4-8c59-97e17f217522 + // 'https://data.bioontology.org/ontologies/RDL/download?download_format=rdf&apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + + // https___data_bioontology_org_ontologies_UPHENO_download_download_format_rdf_apikey_25488e1c-c440-4bf4-8c59-97e17f217522 + // 'https://data.bioontology.org/ontologies/UPHENO/download?download_format=rdf&apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + + // https___data_bioontology_org_ontologies_BIOMODELS_download_download_format_rdf_apikey_25488e1c-c440-4bf4-8c59-97e17f217522 + // 'https://data.bioontology.org/ontologies/BIOMODELS/download?download_format=rdf&apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + + // https___data_bioontology_org_ontologies_PR_download_download_format_rdf_apikey_25488e1c-c440-4bf4-8c59-97e17f217522 + // 'https://data.bioontology.org/ontologies/PR/download?download_format=rdf&apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + + // https___data_bioontology_org_ontologies_BERO_download_download_format_rdf_apikey_25488e1c-c440-4bf4-8c59-97e17f217522 + // 'https://data.bioontology.org/ontologies/BERO/download?download_format=rdf&apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + + // https___data_bioontology_org_ontologies_FOODON_download_download_format_rdf_apikey_25488e1c-c440-4bf4-8c59-97e17f217522 + // 'https://data.bioontology.org/ontologies/FOODON/download?download_format=rdf&apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + + // https___data_bioontology_org_ontologies_FMA_download_download_format_rdf_apikey_25488e1c-c440-4bf4-8c59-97e17f217522 + // 'https://data.bioontology.org/ontologies/FMA/download?download_format=rdf&apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + + // https___data_bioontology_org_ontologies_MESH_download_apikey_25488e1c-c440-4bf4-8c59-97e17f217522 + // 'https://data.bioontology.org/ontologies/MESH/download?apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + + // https___data_bioontology_org_ontologies_NIFSTD_download_download_format_rdf_apikey_25488e1c-c440-4bf4-8c59-97e17f217522 + // 'https://data.bioontology.org/ontologies/NIFSTD/download?download_format=rdf&apikey=25488e1c-c440-4bf4-8c59-97e17f217522', + ])) { + echo PHP_EOL.'takes too long, will be ignored, but downloaded separately'; + continue; + } + + // set latest files based on format + if ('ntriples' == $format) { + $newEntry->setLatestNtFile($ontologyFile); + } elseif ('rdfxml' == $format) { + $newEntry->setLatestRdfXmlFile($ontologyFile); + } elseif ('turtle' == $format) { + $newEntry->setLatestTurtleFile($ontologyFile); + } else { + echo PHP_EOL.' - IGNORED: No valid RDF notation found ('. $format.') for '.$ontologyFile; + continue; + } + + // get file handle + $fileHandle = $this->cache->getLocalFileResourceForFileUrl($ontologyFileWithApiKey); + if (false === is_resource($fileHandle)) { + throw new Exception('Could not open related file for '.$ontologyFileWithApiKey); + } + + // get EasyRdf Graph instance + $localFilePath = $this->cache->getCachedFilePathForFileUrl($ontologyFileWithApiKey); + $graph = $this->loadQuadsIntoEasyRdfGraph($fileHandle, $localFilePath); + fclose($fileHandle); + + if ( + $this->ontologyFileContainsElementsOfCertainTypes($graph) + || 0 == count($graph->toRdfPhp()) + ) { + $this->addFurtherMetadata($newEntry, $graph); + $this->temporaryIndex->storeEntries([$newEntry]); + } else { + throw new Exception('File '.$localFilePath.' does not contain any ontology related instances'); + } + } + } + + public function getPreparedIndexEntry(): IndexEntry + { + return new IndexEntry('BioPortal', 'https://data.bioontology.org/documentation'); + } + + /** + * Get complete list of ontologies. + * + * @return array> + * + * @throws \Exception + */ + public function getOntologiesToProcess(): array + { + // ontology list + $url = $this->ontologyListUrl.$this->apiKey; + + $content = $this->cache->sendCachedRequest($url, $this->namespace); + $ontologies = json_decode($content, true); + + echo PHP_EOL.'loaded '.count($ontologies).' entries'.PHP_EOL; + + return $ontologies; + } +} diff --git a/scripts/src/Extractor/DBpediaArchivo.php b/scripts/src/Extractor/DBpediaArchivo.php index cdea147..c4fb48c 100644 --- a/scripts/src/Extractor/DBpediaArchivo.php +++ b/scripts/src/Extractor/DBpediaArchivo.php @@ -49,7 +49,9 @@ public function run(): void throw new Exception('Could not open related file for '.$indexEntry->getLatestNtFile()); } - $graph = $this->loadQuadsIntoEasyRdfGraph($fileHandle, $indexEntry->getLatestNtFile(), 'ntriples'); + $localFilePath = $this->cache->getCachedFilePathForFileUrl($indexEntry->getLatestNtFile()); + $graph = $this->loadQuadsIntoEasyRdfGraph($fileHandle, $localFilePath); + fclose($fileHandle); } catch (Exception $e) { if (str_contains($e->getMessage(), 'CURLE_OPERATION_TIMEOUTED')) { echo PHP_EOL.' - TIMEOUT, ignored'; @@ -62,10 +64,12 @@ public function run(): void } } - $this->addFurtherMetadata($indexEntry, $graph); - fclose($fileHandle); - - $this->temporaryIndex->storeEntries([$indexEntry]); + if ($this->ontologyFileContainsElementsOfCertainTypes($graph)) { + $this->addFurtherMetadata($indexEntry, $graph); + $this->temporaryIndex->storeEntries([$indexEntry]); + } else { + throw new Exception('File '.$localFilePath.' does not contain any ontology related instances'); + } } } @@ -101,7 +105,7 @@ public function getOntologiesToProcess(): array // info page + title/name of ontology preg_match('/\s*\n*(.*?)setSourcePage('https://archivo.dbpedia.org/'.$data[1]); + $newEntry->setSourcePage('https://archivo.dbpedia.org'.$data[1]); } if (isset($data[1]) && false === isEmpty($data[2])) { $newEntry->setOntologyTitle($this->cleanString($data[2])); diff --git a/scripts/src/Extractor/LinkedOpenVocabularies.php b/scripts/src/Extractor/LinkedOpenVocabularies.php index c57bf25..23de49e 100644 --- a/scripts/src/Extractor/LinkedOpenVocabularies.php +++ b/scripts/src/Extractor/LinkedOpenVocabularies.php @@ -100,7 +100,8 @@ public function run(): void } } - $ontologyGraph = $this->loadQuadsIntoEasyRdfGraph($fileHandle, $ontology->getLatestN3File(), 'n3'); + $localFilePath = $this->cache->getCachedFilePathForFileUrl($ontology->getLatestN3File()); + $ontologyGraph = $this->loadQuadsIntoEasyRdfGraph($fileHandle, $localFilePath); fclose($fileHandle); $this->addFurtherMetadata($ontology, $ontologyGraph); @@ -110,7 +111,11 @@ public function run(): void throw new Exception('No related dcat:distribution found.'); } - $this->temporaryIndex->storeEntries([$ontology]); + if ($this->ontologyFileContainsElementsOfCertainTypes($graph)) { + $this->temporaryIndex->storeEntries([$ontology]); + } else { + throw new Exception('File '.$localFilePath.' does not contain any ontology related instances'); + } } } diff --git a/scripts/src/Extractor/OntologyLookupService.php b/scripts/src/Extractor/OntologyLookupService.php index 0f95a0e..121289d 100644 --- a/scripts/src/Extractor/OntologyLookupService.php +++ b/scripts/src/Extractor/OntologyLookupService.php @@ -128,7 +128,8 @@ public function run(): void } $format = $this->guessFormatOnFile($ontologyFileLocation); - $graph = $this->loadQuadsIntoEasyRdfGraph($fileHandle, $ontologyFileLocation, $format); + $localFilePath = $this->cache->getCachedFilePathForFileUrl($ontologyFileLocation); + $graph = $this->loadQuadsIntoEasyRdfGraph($fileHandle, $localFilePath); fclose($fileHandle); // if title is empty, try to load file and get it this way @@ -152,8 +153,6 @@ public function run(): void $newEntry->setOntologyTitle($ontology['config']['title']); } - $this->addFurtherMetadata($newEntry, $graph); - // set latest access $uploaded = new DateTime($ontology['updated'], new DateTimeZone('UTC')); $newEntry->setLatestAccess($uploaded->format('Y-m-d')); @@ -171,7 +170,12 @@ public function run(): void throw new Exception('Unknown file format ('.$format.') for '.$ontologyFileLocation); } - $this->temporaryIndex->storeEntries([$newEntry]); + if ($this->ontologyFileContainsElementsOfCertainTypes($graph)) { + $this->addFurtherMetadata($newEntry, $graph); + $this->temporaryIndex->storeEntries([$ontology]); + } else { + throw new Exception('File '.$localFilePath.' does not contain any ontology related instances'); + } } } } else { diff --git a/scripts/src/IndexEntry.php b/scripts/src/IndexEntry.php index 86cc842..53da5a4 100644 --- a/scripts/src/IndexEntry.php +++ b/scripts/src/IndexEntry.php @@ -58,7 +58,7 @@ public function getOntologyTitle(): string|null */ public function setOntologyTitle(string|null $ontologyTitle): self { - if (isUrl($ontologyTitle) || isEmpty($ontologyTitle)) { + if (false === isEmpty($ontologyTitle)) { $this->ontologyTitle = trim((string) $ontologyTitle); return $this; @@ -77,7 +77,7 @@ public function getOntologyIri(): string|null */ public function setOntologyIri(string|null $ontologyIri): self { - if (isUrl($ontologyIri) || isEmpty($ontologyIri)) { + if (false === isEmpty($ontologyIri)) { $this->ontologyIri = trim((string) $ontologyIri); return $this;