Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly parse inline xmp extension schema #132

Open
wants to merge 1 commit into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 50 additions & 15 deletions xmpbox/src/main/java/org/apache/xmpbox/xml/DomXmpParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.Map;
import java.util.Deque;
import java.util.StringTokenizer;
import java.util.stream.Collectors;

import javax.xml.XMLConstants;
import javax.xml.namespace.QName;
Expand Down Expand Up @@ -179,31 +180,61 @@ public XMPMetadata parse(InputStream input) throws XmpParsingException
// Now, parse the content of root
Element rdfRdf = findDescriptionsParent(root);
List<Element> descriptions = DomHelper.getElementChildren(rdfRdf);
List<Element> dataDescriptions = new ArrayList<>(descriptions.size());
for (Element description : descriptions)
for (final Element description : descriptions)
{
Element first = DomHelper.getFirstChildElement(description);
if (first != null && "pdfaExtension".equals(first.getPrefix()))
{
PdfaExtensionHelper.validateNaming(xmp, description);
parseDescriptionRoot(xmp, description);
}
else
{
dataDescriptions.add(description);
}
parseSchemaExtensions(xmp, description);
}
// find schema description
PdfaExtensionHelper.populateSchemaMapping(xmp);
// parse data description
for (Element description : dataDescriptions)
for (Element description : descriptions)
{
parseDescriptionRoot(xmp, description);
}

return xmp;
}

private boolean isSchemaExtensionProperty(final Element element)
{
return element != null && "pdfaExtension".equals(element.getPrefix());
}

private void parseSchemaExtensions(final XMPMetadata xmp, final Element description) throws XmpParsingException
{
final TypeMapping tm = xmp.getTypeMapping();
nsFinder.push(description);
try
{
final List<Element> schemaExtensions = DomHelper.getElementChildren(description)
.stream()
.filter(this::isSchemaExtensionProperty)
.collect(Collectors.toList());
for (final Element schemaExtension : schemaExtensions)
{
final String namespace = schemaExtension.getNamespaceURI();
if (!tm.isDefinedSchema(schemaExtension.getNamespaceURI()))
{
throw new XmpParsingException(ErrorType.NoSchema,
"This namespace is not a schema or a structured type : " + namespace);
}
PropertyType type = checkPropertyDefinition(xmp, DomHelper.getQName(schemaExtension));
final XMPSchema schema = tm.getSchemaFactory(namespace).createXMPSchema(xmp, schemaExtension.getPrefix());
loadAttributes(schema, description);
ComplexPropertyContainer container = schema.getContainer();
createProperty(xmp, schemaExtension, type, container);
}
}
catch (XmpSchemaException e)
{
throw new XmpParsingException(ErrorType.Undefined, "Parsing failed", e);
}
finally
{
nsFinder.pop();
}
}

private void parseDescriptionRoot(XMPMetadata xmp, Element description) throws XmpParsingException
{
nsFinder.push(description);
Expand Down Expand Up @@ -308,6 +339,10 @@ private void parseChildrenAsProperties(XMPMetadata xmp, List<Element> properties
throw new XmpParsingException(ErrorType.NoSchema,
"This namespace is not a schema or a structured type : " + namespace);
}
if (isSchemaExtensionProperty(property))
{
continue;
}
XMPSchema schema = xmp.getSchema(namespace);
if (schema == null)
{
Expand Down Expand Up @@ -837,8 +872,8 @@ private void removeComments(Node root)
// There is only one node so we do not remove it
return;
}
for (int i = 0; i < nl.getLength(); i++)

for (int i = 0; i < nl.getLength(); i++)
{
Node node = nl.item(i);
if (node instanceof Comment)
Expand Down
41 changes: 41 additions & 0 deletions xmpbox/src/test/java/org/apache/xmpbox/parser/InlineXmpTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package org.apache.xmpbox.parser;

import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.type.BadFieldValueException;
import org.apache.xmpbox.xml.DomXmpParser;
import org.apache.xmpbox.xml.XmpParsingException;
import org.junit.jupiter.api.Test;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;

import static org.junit.jupiter.api.Assertions.*;

public class InlineXmpTest
{

private static final String EXAMPLE = "src/test/resources/validxmp/inline-definition.xml";
@Test
public void testCanParseValidSchema() throws IOException, XmpParsingException, BadFieldValueException
{
byte[] xmpData = Files.readAllBytes(Paths.get(EXAMPLE));
final DomXmpParser xmpParser = new DomXmpParser();
final XMPMetadata metadata = xmpParser.parse(xmpData);
checkForPDFAIdentifiers(metadata);
}

private void checkForPDFAIdentifiers(final XMPMetadata xmp) throws BadFieldValueException
{
assertNotNull(xmp, "XMPSchema nicht vorhanden");
final PDFAIdentificationSchema pdfaIdSchema = xmp.getPDFAIdentificationSchema();
assertNotNull(pdfaIdSchema, "PDFAIdentificationSchema nicht vorhanden");
final int partValue = pdfaIdSchema.getPart();
assertTrue(partValue == 1 || partValue == 2,
"Das PDF-Dokument entspricht nicht dem geforderten Standard");
final String dataValue = xmp.getSchema("http://ns.example.org/default/1.0/").getUnqualifiedTextPropertyValue("Data");
assertEquals("Example", dataValue, "Falscher Wert in Data-Field");
}

}
80 changes: 80 additions & 0 deletions xmpbox/src/test/resources/validxmp/inline-definition.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
<?xpacket begin="" id="W5M0MpCehiHzreSzNTczkc9d"?>
<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 5.6-c015 84.159810, 2016/09/10-02:41:30 ">
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
<rdf:Description rdf:about=""
xmlns:xmp="http://ns.adobe.com/xap/1.0/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:pdf="http://ns.adobe.com/pdf/1.3/"
xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/"
xmlns:pdfaExtension="http://www.aiim.org/pdfa/ns/extension/"
xmlns:pdfaSchema="http://www.aiim.org/pdfa/ns/schema#"
xmlns:pdfaProperty="http://www.aiim.org/pdfa/ns/property#"
xmlns:example="http://ns.example.org/default/1.0/">
<xmp:CreateDate>2021-05-21T11:42:49+01:00</xmp:CreateDate>
<xmp:ModifyDate>2021-05-21T11:47:16+02:00</xmp:ModifyDate>
<xmp:MetadataDate>2021-05-21T11:47:16+02:00</xmp:MetadataDate>
<dc:format>application/pdf</dc:format>
<dc:title>
<rdf:Alt>
<rdf:li xml:lang="x-default">Inline XMP Extension PoC</rdf:li>
</rdf:Alt>
</dc:title>
<dc:creator>
<rdf:Seq>
<rdf:li>DSO</rdf:li>
</rdf:Seq>
</dc:creator>
<dc:description>
<rdf:Alt>
<rdf:li xml:lang="x-default">Inline XMP Extension PoC</rdf:li>
</rdf:Alt>
</dc:description>
<pdf:Keywords/>
<pdfaid:part>2</pdfaid:part>
<pdfaid:conformance>A</pdfaid:conformance>
<example:Data>Example</example:Data>
<pdfaExtension:schemas>
<rdf:Bag>
<rdf:li rdf:parseType="Resource">
<pdfaSchema:schema>Simple Schema</pdfaSchema:schema>
<pdfaSchema:namespaceURI>http://ns.example.org/default/1.0/</pdfaSchema:namespaceURI>
<pdfaSchema:prefix>example</pdfaSchema:prefix>
<pdfaSchema:property>
<rdf:Seq>
<rdf:li rdf:parseType="Resource">
<pdfaProperty:name>Data</pdfaProperty:name>
<pdfaProperty:valueType>Text</pdfaProperty:valueType>
<pdfaProperty:category>internal</pdfaProperty:category>
<pdfaProperty:description>Example Data</pdfaProperty:description>
</rdf:li>
</rdf:Seq>
</pdfaSchema:property>
</rdf:li>
<rdf:li rdf:parseType="Resource">
<pdfaSchema:namespaceURI>http://www.aiim.org/pdfa/ns/id/</pdfaSchema:namespaceURI>
<pdfaSchema:prefix>pdfaid</pdfaSchema:prefix>
<pdfaSchema:schema>PDF/A ID Schema</pdfaSchema:schema>
<pdfaSchema:property>
<rdf:Seq>
<rdf:li rdf:parseType="Resource">
<pdfaProperty:category>internal</pdfaProperty:category>
<pdfaProperty:description>Part of PDF/A standard</pdfaProperty:description>
<pdfaProperty:name>part</pdfaProperty:name>
<pdfaProperty:valueType>Integer</pdfaProperty:valueType>
</rdf:li>
<rdf:li rdf:parseType="Resource">
<pdfaProperty:category>internal</pdfaProperty:category>
<pdfaProperty:description>Conformance level of PDF/A standard</pdfaProperty:description>
<pdfaProperty:name>conformance</pdfaProperty:name>
<pdfaProperty:valueType>Text</pdfaProperty:valueType>
</rdf:li>
</rdf:Seq>
</pdfaSchema:property>
</rdf:li>
</rdf:Bag>
</pdfaExtension:schemas>
</rdf:Description>
</rdf:RDF>
</x:xmpmeta>

<?xpacket end="w"?>