From 4069430eb3ba458ab1d440e8785b43a3eae9476d Mon Sep 17 00:00:00 2001 From: David Sommer Date: Mon, 11 Oct 2021 13:16:11 +0200 Subject: [PATCH] Correctly parse inline xmp extension schema --- .../org/apache/xmpbox/xml/DomXmpParser.java | 65 +++++++++++---- .../apache/xmpbox/parser/InlineXmpTest.java | 41 ++++++++++ .../resources/validxmp/inline-definition.xml | 80 +++++++++++++++++++ 3 files changed, 171 insertions(+), 15 deletions(-) create mode 100644 xmpbox/src/test/java/org/apache/xmpbox/parser/InlineXmpTest.java create mode 100644 xmpbox/src/test/resources/validxmp/inline-definition.xml diff --git a/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java b/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java index 2ac48a735e7..cd6c1ae40bc 100644 --- a/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java +++ b/xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java @@ -31,6 +31,7 @@ import java.util.Map; import java.util.Deque; import java.util.StringTokenizer; +import java.util.stream.Collectors; import javax.xml.XMLConstants; import javax.xml.namespace.QName; @@ -179,24 +180,14 @@ public XMPMetadata parse(InputStream input) throws XmpParsingException // Now, parse the content of root Element rdfRdf = findDescriptionsParent(root); List descriptions = DomHelper.getElementChildren(rdfRdf); - List dataDescriptions = new ArrayList<>(descriptions.size()); - for (Element description : descriptions) + for (final Element description : descriptions) { - Element first = DomHelper.getFirstChildElement(description); - if (first != null && "pdfaExtension".equals(first.getPrefix())) - { - PdfaExtensionHelper.validateNaming(xmp, description); - parseDescriptionRoot(xmp, description); - } - else - { - dataDescriptions.add(description); - } + parseSchemaExtensions(xmp, description); } // find schema description PdfaExtensionHelper.populateSchemaMapping(xmp); // parse data description - for (Element description : dataDescriptions) + for (Element description : descriptions) { parseDescriptionRoot(xmp, description); } @@ -204,6 +195,46 @@ public XMPMetadata parse(InputStream input) throws XmpParsingException return xmp; } + private boolean isSchemaExtensionProperty(final Element element) + { + return element != null && "pdfaExtension".equals(element.getPrefix()); + } + + private void parseSchemaExtensions(final XMPMetadata xmp, final Element description) throws XmpParsingException + { + final TypeMapping tm = xmp.getTypeMapping(); + nsFinder.push(description); + try + { + final List schemaExtensions = DomHelper.getElementChildren(description) + .stream() + .filter(this::isSchemaExtensionProperty) + .collect(Collectors.toList()); + for (final Element schemaExtension : schemaExtensions) + { + final String namespace = schemaExtension.getNamespaceURI(); + if (!tm.isDefinedSchema(schemaExtension.getNamespaceURI())) + { + throw new XmpParsingException(ErrorType.NoSchema, + "This namespace is not a schema or a structured type : " + namespace); + } + PropertyType type = checkPropertyDefinition(xmp, DomHelper.getQName(schemaExtension)); + final XMPSchema schema = tm.getSchemaFactory(namespace).createXMPSchema(xmp, schemaExtension.getPrefix()); + loadAttributes(schema, description); + ComplexPropertyContainer container = schema.getContainer(); + createProperty(xmp, schemaExtension, type, container); + } + } + catch (XmpSchemaException e) + { + throw new XmpParsingException(ErrorType.Undefined, "Parsing failed", e); + } + finally + { + nsFinder.pop(); + } + } + private void parseDescriptionRoot(XMPMetadata xmp, Element description) throws XmpParsingException { nsFinder.push(description); @@ -308,6 +339,10 @@ private void parseChildrenAsProperties(XMPMetadata xmp, List properties throw new XmpParsingException(ErrorType.NoSchema, "This namespace is not a schema or a structured type : " + namespace); } + if (isSchemaExtensionProperty(property)) + { + continue; + } XMPSchema schema = xmp.getSchema(namespace); if (schema == null) { @@ -837,8 +872,8 @@ private void removeComments(Node root) // There is only one node so we do not remove it return; } - - for (int i = 0; i < nl.getLength(); i++) + + for (int i = 0; i < nl.getLength(); i++) { Node node = nl.item(i); if (node instanceof Comment) diff --git a/xmpbox/src/test/java/org/apache/xmpbox/parser/InlineXmpTest.java b/xmpbox/src/test/java/org/apache/xmpbox/parser/InlineXmpTest.java new file mode 100644 index 00000000000..cf64a13722d --- /dev/null +++ b/xmpbox/src/test/java/org/apache/xmpbox/parser/InlineXmpTest.java @@ -0,0 +1,41 @@ +package org.apache.xmpbox.parser; + +import org.apache.xmpbox.XMPMetadata; +import org.apache.xmpbox.schema.PDFAIdentificationSchema; +import org.apache.xmpbox.type.BadFieldValueException; +import org.apache.xmpbox.xml.DomXmpParser; +import org.apache.xmpbox.xml.XmpParsingException; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; + +import static org.junit.jupiter.api.Assertions.*; + +public class InlineXmpTest +{ + + private static final String EXAMPLE = "src/test/resources/validxmp/inline-definition.xml"; + @Test + public void testCanParseValidSchema() throws IOException, XmpParsingException, BadFieldValueException + { + byte[] xmpData = Files.readAllBytes(Paths.get(EXAMPLE)); + final DomXmpParser xmpParser = new DomXmpParser(); + final XMPMetadata metadata = xmpParser.parse(xmpData); + checkForPDFAIdentifiers(metadata); + } + + private void checkForPDFAIdentifiers(final XMPMetadata xmp) throws BadFieldValueException + { + assertNotNull(xmp, "XMPSchema nicht vorhanden"); + final PDFAIdentificationSchema pdfaIdSchema = xmp.getPDFAIdentificationSchema(); + assertNotNull(pdfaIdSchema, "PDFAIdentificationSchema nicht vorhanden"); + final int partValue = pdfaIdSchema.getPart(); + assertTrue(partValue == 1 || partValue == 2, + "Das PDF-Dokument entspricht nicht dem geforderten Standard"); + final String dataValue = xmp.getSchema("http://ns.example.org/default/1.0/").getUnqualifiedTextPropertyValue("Data"); + assertEquals("Example", dataValue, "Falscher Wert in Data-Field"); + } + +} diff --git a/xmpbox/src/test/resources/validxmp/inline-definition.xml b/xmpbox/src/test/resources/validxmp/inline-definition.xml new file mode 100644 index 00000000000..5540548b51b --- /dev/null +++ b/xmpbox/src/test/resources/validxmp/inline-definition.xml @@ -0,0 +1,80 @@ + + + + + 2021-05-21T11:42:49+01:00 + 2021-05-21T11:47:16+02:00 + 2021-05-21T11:47:16+02:00 + application/pdf + + + Inline XMP Extension PoC + + + + + DSO + + + + + Inline XMP Extension PoC + + + + 2 + A + Example + + + + Simple Schema + http://ns.example.org/default/1.0/ + example + + + + Data + Text + internal + Example Data + + + + + + http://www.aiim.org/pdfa/ns/id/ + pdfaid + PDF/A ID Schema + + + + internal + Part of PDF/A standard + part + Integer + + + internal + Conformance level of PDF/A standard + conformance + Text + + + + + + + + + + + \ No newline at end of file