From 26e506347e800e6bc25b9065c88a6000252a7f37 Mon Sep 17 00:00:00 2001 From: Kirill Yankov Date: Thu, 16 Nov 2023 16:47:23 +0100 Subject: [PATCH] Refactorings, trying to load context from request host when localhost --- .../scala/org/dbpedia/databus/ApiImpl.scala | 38 ++++--- .../databus/CachingJsonldContext.scala | 4 +- .../org/dbpedia/databus/SparqlClient.scala | 102 +++++++++++------- .../dbpedia/databus/DatabusScalatraTest.scala | 31 +++++- .../org/dbpedia/databus/ValidationTest.scala | 38 +++++-- 5 files changed, 144 insertions(+), 69 deletions(-) diff --git a/src/main/scala/org/dbpedia/databus/ApiImpl.scala b/src/main/scala/org/dbpedia/databus/ApiImpl.scala index 9001ad8..c8546a8 100644 --- a/src/main/scala/org/dbpedia/databus/ApiImpl.scala +++ b/src/main/scala/org/dbpedia/databus/ApiImpl.scala @@ -3,14 +3,13 @@ package org.dbpedia.databus import java.io.FileNotFoundException import java.net.URL import java.nio.file.{NoSuchFileException, Path, Paths} - import javax.servlet.ServletContext import javax.servlet.http.HttpServletRequest import org.apache.jena.rdf.model.Model import org.apache.jena.riot.Lang import org.apache.jena.shared.JenaException import org.dbpedia.databus.ApiImpl.Config -import org.dbpedia.databus.RdfConversions.{contextUri, generateGraphId, graphToBytes, mapContentType, readModel} +import org.dbpedia.databus.RdfConversions.{contextUrl, generateGraphId, graphToBytes, jenaJsonLdContextWithFallbackForLocalhost, mapContentType, readModel} import org.dbpedia.databus.swagger.api.DatabusApi import org.dbpedia.databus.swagger.model.{OperationFailure, OperationSuccess} import org.eclipse.jgit.errors.{MissingObjectException, RepositoryNotFoundException} @@ -32,7 +31,12 @@ class ApiImpl(config: Config) extends DatabusApi { override def dataidSubgraph(body: String)(request: HttpServletRequest): Try[String] = - readModel(body.getBytes, defaultLang, contextUri(body.getBytes, defaultLang)) + readModel( + body.getBytes, + defaultLang, + contextUrl(body.getBytes, defaultLang) + .map(jenaJsonLdContextWithFallbackForLocalhost(_, request.getRemoteHost).get) + ) .flatMap(m => Tractate.extract(m.getGraph, TractateV1.Version)) .map(_.stringForSigning) @@ -64,11 +68,12 @@ class ApiImpl(config: Config) extends DatabusApi { .map(_.toLowerCase) .getOrElse("") val lang = mapContentType(ct, defaultLang) - val ctxUri = contextUri(body.getBytes, lang) - readModel(body.getBytes, lang, ctxUri) + val ctxU = contextUrl(body.getBytes, lang) + val ctx = ctxU.map(cu => jenaJsonLdContextWithFallbackForLocalhost(cu, request.getRemoteHost).get) + readModel(body.getBytes, lang, ctx) .flatMap(model => { saveToVirtuoso(model, graphId)({ - graphToBytes(model.getGraph, defaultLang, ctxUri) + graphToBytes(model.getGraph, defaultLang, ctxU) .flatMap(a => saveFiles(username, Map( pa -> a )).map(hash => OperationSuccess(graphId, hash))) @@ -79,9 +84,17 @@ class ApiImpl(config: Config) extends DatabusApi { override def shaclValidate(dataid: String, shacl: String)(request: HttpServletRequest): Try[String] = { val lang = getLangFromAcceptHeader(request) setResponseHeaders(Map("Content-Type" -> lang.getContentType.toHeaderString))(request) + val ctxU = contextUrl(dataid.getBytes, lang) + val ctx = ctxU.map(cu => jenaJsonLdContextWithFallbackForLocalhost(cu, request.getRemoteHost).get) + + val shaclU = contextUrl(shacl.getBytes, RdfConversions.DefaultShaclLang) + val shaclCtx = shaclU.map(cu => jenaJsonLdContextWithFallbackForLocalhost(cu, request.getRemoteHost).get) + RdfConversions.validateWithShacl( dataid.getBytes, shacl.getBytes, + ctx, + shaclCtx, defaultLang ).flatMap(r => RdfConversions.graphToBytes(r.getGraph, lang, None)) .map(new String(_)) @@ -121,8 +134,13 @@ class ApiImpl(config: Config) extends DatabusApi { setResponseHeaders(Map("Content-Type" -> lang.getContentType.toHeaderString))(request) client.readFile(username, p) .flatMap(body => { - val ctxUri = contextUri(body, defaultLang) - readModel(body, defaultLang, ctxUri) + val ctxUri = contextUrl(body, defaultLang) + readModel( + body, + defaultLang, + contextUrl(body, defaultLang) + .map(jenaJsonLdContextWithFallbackForLocalhost(_, request.getRemoteHost).get) + ) .flatMap(m => graphToBytes(m.getGraph, lang, ctxUri) ) @@ -144,10 +162,6 @@ class ApiImpl(config: Config) extends DatabusApi { } } - private[databus] def saveToVirtuoso[T](data: Array[Byte], lang: Lang, graphId: String)(execInTransaction: => Try[T]): Try[T] = - readModel(data, lang, contextUri(data, lang)) - .flatMap(saveToVirtuoso(_, graphId)(execInTransaction)) - private[databus] def saveToVirtuoso[T](model: Model, graphId: String)(execInTransaction: => Try[T]): Try[T] = { val rqsts = model.getGraph.find().asScala .grouped(1000) diff --git a/src/main/scala/org/dbpedia/databus/CachingJsonldContext.scala b/src/main/scala/org/dbpedia/databus/CachingJsonldContext.scala index 7981001..aa0242d 100644 --- a/src/main/scala/org/dbpedia/databus/CachingJsonldContext.scala +++ b/src/main/scala/org/dbpedia/databus/CachingJsonldContext.scala @@ -23,8 +23,8 @@ class CachingJsonldContext(sizeLimit: Int, opts: JsonLdOptions) extends Context( }) case _ => super.parse(ctx) } - - + def putInCache(contextUri: String, ctx: Context) = + cache.put(contextUri, ctx) } diff --git a/src/main/scala/org/dbpedia/databus/SparqlClient.scala b/src/main/scala/org/dbpedia/databus/SparqlClient.scala index 5aefe21..1619be9 100644 --- a/src/main/scala/org/dbpedia/databus/SparqlClient.scala +++ b/src/main/scala/org/dbpedia/databus/SparqlClient.scala @@ -2,8 +2,8 @@ package org.dbpedia.databus import java.io.{ByteArrayInputStream, ByteArrayOutputStream} -import java.net.URL - +import java.net.{InetAddress, URL} +import com.github.jsonldjava.core import com.github.jsonldjava.core.{JsonLdConsts, JsonLdOptions} import com.github.jsonldjava.utils.JsonUtils import com.mchange.v2.c3p0.ComboPooledDataSource @@ -157,9 +157,9 @@ object RdfConversions { private lazy val CachingContext = initCachingContext() - private val DefaultShaclLang = Lang.TTL + val DefaultShaclLang = Lang.TTL - def readModel(data: Array[Byte], lang: Lang, context: Option[String]): Try[Model] = Try { + def readModel(data: Array[Byte], lang: Lang, context: Option[util.Context]): Try[Model] = Try { val model = ModelFactory.createDefaultModel() val dataStream = new ByteArrayInputStream(data) val dest = StreamRDFLib.graph(model.getGraph) @@ -169,24 +169,21 @@ object RdfConversions { .lang(lang) context.foreach(cs => - parser.context( - jenaContext(CachingContext.parse(cs)) - ) - ) + parser.context(cs)) parser.parse(dest) model } - def graphToBytes(model: Graph, outputLang: Lang, context: Option[String]): Try[Array[Byte]] = Try { + def graphToBytes(model: Graph, outputLang: Lang, context: Option[URL]): Try[Array[Byte]] = Try { val str = new ByteArrayOutputStream() val builder = RDFWriter.create.format(langToFormat(outputLang)) .source(model) context.foreach(ctx => { - val jctx = jenaContext(CachingContext.parse(ctx)) + val jctx = jenaContext(CachingContext.parse(ctx.toString)) builder.context(jctx) - builder.set(JsonLDWriter.JSONLD_CONTEXT_SUBSTITUTION, new JsonString(ctx)) + builder.set(JsonLDWriter.JSONLD_CONTEXT_SUBSTITUTION, new JsonString(ctx.toString)) }) builder @@ -201,19 +198,17 @@ object RdfConversions { .validate(Shapes.parse(shacl), model.getGraph) ) - def validateWithShacl(file: Array[Byte], shaclData: Array[Byte], modelLang: Lang): Try[ValidationReport] = + def validateWithShacl(file: Array[Byte], shaclData: Array[Byte], fileCtx: Option[util.Context], shaclCtx: Option[util.Context], modelLang: Lang): Try[ValidationReport] = for { - shaclGra <- readModel(shaclData, DefaultShaclLang, contextUri(shaclData, DefaultShaclLang)) - ctxUri = contextUri(file, modelLang) - model <- readModel(file, modelLang, ctxUri) + shaclGra <- readModel(shaclData, DefaultShaclLang, shaclCtx) + model <- readModel(file, modelLang, fileCtx) re <- validateWithShacl(model, shaclGra.getGraph) } yield re - def validateWithShacl(file: Array[Byte], shaclUri: String, modelLang: Lang): Try[ValidationReport] = + def validateWithShacl(file: Array[Byte], fileCtx: Option[util.Context], shaclUri: String, modelLang: Lang): Try[ValidationReport] = for { shaclGra <- Try(RDFDataMgr.loadGraph(shaclUri)) - ctxUri = contextUri(file, modelLang) - model <- readModel(file, modelLang, ctxUri) + model <- readModel(file, modelLang, fileCtx) re <- validateWithShacl(model, shaclGra) } yield re @@ -299,26 +294,59 @@ object RdfConversions { bld.append(">") } - // TODO implement extraction of context as an object and then setting it directly - def contextUri(data: Array[Byte], lang: Lang): Option[String] = - if (lang.getName == Lang.JSONLD.getName) jsonLdContextUriString(new String(data)) else None + def contextUrl(data: Array[Byte], lang: Lang): Option[URL] = + if (lang == Lang.JSONLD) { + jsonLdContextUrl(data) + .get + } else { + None + } + + def jenaJsonLdContextWithFallbackForLocalhost(jsonLdContextUrl: URL, requestHost: String): Try[util.Context] = + jsonLdContextWithFallbackForLocalhost(jsonLdContextUrl, requestHost) + .map(jenaContext) - private def jsonLdContextUriString(data: String): Option[String] = { - val jsonObject = JsonUtils.fromString(new String(data)) + private def jsonLdContextUrl(data: Array[Byte]): Try[Option[URL]] = Try( - jsonObject - .asInstanceOf[java.util.Map[String, Object]] - .get(JsonLdConsts.CONTEXT) - .toString + JsonUtils.fromString(new String(data)) ) - .toOption - .flatMap(ctx => Try(new URL(ctx)) match { - case Failure(_) => None - case Success(uri) => Some(uri.toString()) - }) + .map(j => + Try(j.asInstanceOf[java.util.Map[String, Object]]).toOption) + .map(_.flatMap(c => + Option(c.get(JsonLdConsts.CONTEXT)) + .map(_.toString) + .flatMap(ctx => + Try(new URL(ctx)) match { + case Failure(_) => None + case Success(uri) => Some(uri) + }))) + + private def jsonLdContextWithFallbackForLocalhost(jsonLdContextUrl: URL, requestHost: String): Try[core.Context] = + Try(CachingContext.parse(jsonLdContextUrl.toString)) + .recoverWith { + case e => + if (InetAddress.getByName(jsonLdContextUrl.getHost).isLoopbackAddress) { + preloadLocalhostContextFromRequestHost(jsonLdContextUrl.toString, requestHost) + } else { + Failure(e) + } + } + + private def jenaContext(jsonLdCtx: core.Context) = { + val context: util.Context = RIOT.getContext.copy() + jsonLdCtx.putAll(jsonLdCtx.getPrefixes(true)) + context.put(JsonLDWriter.JSONLD_CONTEXT, jsonLdCtx) + context.put(JsonLDReader.JSONLD_CONTEXT, jsonLdCtx) + context } - import com.github.jsonldjava.core.Context + private def preloadLocalhostContextFromRequestHost(localhostCtxUri: String, requestHost: String): Try[core.Context] = Try { + val ctxUrl = new URL(localhostCtxUri) + val addressWithIp = localhostCtxUri.replace(ctxUrl.getHost, requestHost) + val ctx = CachingContext.parse(addressWithIp) + CachingContext.putInCache(localhostCtxUri, ctx) + ctx + } private def initCachingContext() = { val opts = new JsonLdOptions(null) @@ -326,14 +354,6 @@ object RdfConversions { new CachingJsonldContext(30, opts) } - private def jenaContext(jsonLdCtx: Context) = { - val context: util.Context = RIOT.getContext.copy() - jsonLdCtx.putAll(jsonLdCtx.getPrefixes(true)) - context.put(JsonLDWriter.JSONLD_CONTEXT, jsonLdCtx) - context.put(JsonLDReader.JSONLD_CONTEXT, jsonLdCtx) - context - } - private def escapeString(s: String) = { val sb = new StringBuilder(s.length()) val slen = s.length() diff --git a/src/test/scala/org/dbpedia/databus/DatabusScalatraTest.scala b/src/test/scala/org/dbpedia/databus/DatabusScalatraTest.scala index 1e1530a..6f596d1 100644 --- a/src/test/scala/org/dbpedia/databus/DatabusScalatraTest.scala +++ b/src/test/scala/org/dbpedia/databus/DatabusScalatraTest.scala @@ -3,19 +3,30 @@ package org.dbpedia.databus import java.io.ByteArrayInputStream import java.nio.file.{Files, Paths} - import org.apache.jena.rdf.model.ModelFactory import org.apache.jena.riot.{Lang, RDFDataMgr} import org.dbpedia.databus.ApiImpl.Config import org.dbpedia.databus.swagger.DatabusSwagger import org.dbpedia.databus.swagger.api.DefaultApi +import org.scalatest.BeforeAndAfter import org.scalatra.test.scalatest.ScalatraFlatSpec import sttp.model.Uri -class DatabusScalatraTest extends ScalatraFlatSpec { +import scala.reflect.io.{Directory, Path} + +class DatabusScalatraTest extends ScalatraFlatSpec with BeforeAndAfter { override def port = 55388 + val dir = Files.createDirectories(Paths.get("target", "test_dir-git")) + + before { + Files.createDirectories(Paths.get("target", "test_dir-git")) + } + after { + Directory(Path.jfile2path(dir.toFile)).deleteRecursively() + } + val config = Config( Uri.parse(s"http://localhost:${port}/virtu/oso").right.get, "u", @@ -24,7 +35,7 @@ class DatabusScalatraTest extends ScalatraFlatSpec { "org.dbpedia.databus.HttpVirtClient", Some("sdcsdc"), "/g", - None, + Some(dir.toAbsolutePath), Some("u"), Some("p"), Some("http"), @@ -39,20 +50,30 @@ class DatabusScalatraTest extends ScalatraFlatSpec { addServlet(new DefaultApi(), "/databus/*") addServlet(new ExternalApiEmul, "/*") + "File save" should "work" in { + val file = "group.jsonld" val bytes = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(file).getFile)) post("/databus/graph/save?repo=kuckuck&path=pa/fl.jsonld", bytes) { status should equal(200) } + + get("/databus/graph/read?repo=kuckuck&path=pa/fl.jsonld") { + status should equal(200) + val respCtx = RdfConversions.contextUrl(bodyBytes, Lang.JSONLD) + respCtx should equal(RdfConversions.contextUrl(bytes, Lang.JSONLD)) + respCtx.get.toString.nonEmpty should equal(true) + } + } - "File read" should "return 500" in { + "File read" should "return 404" in { get("/databus/graph/read?repo=kuckuck&path=pa/not_existing.jsonld") { - status should equal(500) + status should equal(404) } } diff --git a/src/test/scala/org/dbpedia/databus/ValidationTest.scala b/src/test/scala/org/dbpedia/databus/ValidationTest.scala index 0e0eacf..6f00abf 100644 --- a/src/test/scala/org/dbpedia/databus/ValidationTest.scala +++ b/src/test/scala/org/dbpedia/databus/ValidationTest.scala @@ -1,14 +1,11 @@ package org.dbpedia.databus import java.nio.file.{Files, Paths} - import org.apache.jena.query.ARQ -import org.apache.jena.rdf.model.Model import org.apache.jena.riot.Lang +import org.dbpedia.databus.RdfConversions.{contextUrl, jenaJsonLdContextWithFallbackForLocalhost} import org.scalatest.{FlatSpec, Matchers} -import scala.util.{Failure, Success} - class ValidationTest extends FlatSpec with Matchers { ARQ.init() @@ -18,7 +15,11 @@ class ValidationTest extends FlatSpec with Matchers { val shacl = "https://raw.githubusercontent.com/dbpedia/databus-git-mockup/main/dev/dataid-shacl.ttl" val file = "version.jsonld" val bytes = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(file).getFile)) - val re = RdfConversions.validateWithShacl(bytes, shacl, lang) + + val ctxU = contextUrl(bytes, lang) + val ctx = ctxU.map(cu => jenaJsonLdContextWithFallbackForLocalhost(cu, "random").get) + + val re = RdfConversions.validateWithShacl(bytes, ctx, shacl, lang) re.get.conforms() should be(true) } @@ -26,7 +27,11 @@ class ValidationTest extends FlatSpec with Matchers { val shacl = "https://raw.githubusercontent.com/dbpedia/databus-git-mockup/main/dev/dataid-shacl.ttl" val file = "version_wrong.jsonld" val bytes = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(file).getFile)) - val re = RdfConversions.validateWithShacl(bytes, shacl, lang) + + val ctxU = contextUrl(bytes, lang) + val ctx = ctxU.map(cu => jenaJsonLdContextWithFallbackForLocalhost(cu, "random").get) + + val re = RdfConversions.validateWithShacl(bytes, ctx, shacl, lang) re.get.conforms() should be(false) } @@ -34,7 +39,11 @@ class ValidationTest extends FlatSpec with Matchers { val shacl = "https://raw.githubusercontent.com/dbpedia/databus-git-mockup/main/dev/dataid-shacl.ttl" val file = "group.jsonld" val bytes = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(file).getFile)) - val re = RdfConversions.validateWithShacl(bytes, shacl, lang) + + val ctxU = contextUrl(bytes, lang) + val ctx = ctxU.map(cu => jenaJsonLdContextWithFallbackForLocalhost(cu, "random").get) + + val re = RdfConversions.validateWithShacl(bytes, ctx, shacl, lang) re.get.conforms() should be(true) } @@ -42,7 +51,11 @@ class ValidationTest extends FlatSpec with Matchers { val shacl = "https://raw.githubusercontent.com/dbpedia/databus-git-mockup/main/dev/dataid-shacl.ttl" val file = "version.jsonld" val bytes = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(file).getFile)) - val re = RdfConversions.validateWithShacl(bytes, shacl, lang) + + val ctxU = contextUrl(bytes, lang) + val ctx = ctxU.map(cu => jenaJsonLdContextWithFallbackForLocalhost(cu, "random").get) + + val re = RdfConversions.validateWithShacl(bytes, ctx, shacl, lang) re.get.conforms() should be(true) } @@ -51,7 +64,14 @@ class ValidationTest extends FlatSpec with Matchers { val shacl = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(shaclFn).getFile)) val file = "version.jsonld" val bytes = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(file).getFile)) - val re = RdfConversions.validateWithShacl(bytes, shacl, lang) + + val ctxU = contextUrl(bytes, lang) + val ctx = ctxU.map(cu => jenaJsonLdContextWithFallbackForLocalhost(cu, "random").get) + + val shaclU = contextUrl(shacl, RdfConversions.DefaultShaclLang) + val shaclCtx = shaclU.map(cu => jenaJsonLdContextWithFallbackForLocalhost(cu, "random").get) + + val re = RdfConversions.validateWithShacl(bytes, shacl, ctx, shaclCtx, lang) re.get.conforms() should be(true) }