From c42fc11de122bbdb26fbb90ad1ad273836817230 Mon Sep 17 00:00:00 2001 From: Brian Huisman Date: Tue, 11 Jul 2023 10:34:11 -0400 Subject: [PATCH] Support metadata element names containing spaces (#612) * Update Element.php Add ability for PdfParser to parse metadata names with hexadecimal encoded characters such as "Document#20Type" where \#20 is a space. Resolves Issue #529 * Update ElementTest.php Add test for spaces in metadata property names. * Make sure we fully support hex Too quick on the commit! Make sure our two 'digit' regexp also finds A-F hex digits. Add a test for #2d which is a hyphen. * fixed coding style issue in Element.php --------- Co-authored-by: Konrad Abicht --- src/Smalot/PdfParser/Element.php | 10 ++++++++-- tests/PHPUnit/Integration/ElementTest.php | 7 ++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Smalot/PdfParser/Element.php b/src/Smalot/PdfParser/Element.php index df61c583..0ce6c428 100644 --- a/src/Smalot/PdfParser/Element.php +++ b/src/Smalot/PdfParser/Element.php @@ -107,10 +107,16 @@ public static function parse(string $content, Document $document = null, int &$p $old_position = $position; if (!$only_values) { - if (!preg_match('/\G\s*(?P\/[A-Z0-9\._]+)(?P.*)/si', $content, $match, 0, $position)) { + if (!preg_match('/\G\s*(?P\/[A-Z#0-9\._]+)(?P.*)/si', $content, $match, 0, $position)) { break; } else { - $name = ltrim($match['name'], '/'); + $name = preg_replace_callback( + '/#([0-9a-f]{2})/i', + function ($m) { + return \chr(base_convert($m[1], 16, 10)); + }, + ltrim($match['name'], '/') + ); $value = $match['value']; $position = strpos($content, $value, $position + \strlen($match['name'])); } diff --git a/tests/PHPUnit/Integration/ElementTest.php b/tests/PHPUnit/Integration/ElementTest.php index 2b40b7e7..f3f3ceed 100644 --- a/tests/PHPUnit/Integration/ElementTest.php +++ b/tests/PHPUnit/Integration/ElementTest.php @@ -57,7 +57,8 @@ public function testParse(): void $content = '/NameType /FlateDecode /Contents[4 0 R 42]/Fonts<>/NullType null/StringType(hello)/DateType(D:20130901235555+02\'00\')/XRefType 2 0 R - /NumericType 8/HexaType<0020>/BooleanType false'; + /NumericType 8/HexaType<0020>/BooleanType false + /Space#20Test(Templates)/Hyphen#2DTest(Templates)'; $offset = 0; $elements = Element::parse($content, $document, $offset, false); @@ -100,6 +101,10 @@ public function testParse(): void $this->assertTrue($elements['BooleanType'] instanceof ElementBoolean); $this->assertFalse($elements['BooleanType']->getContent()); + $this->assertTrue(\array_key_exists('Space Test', $elements)); + + $this->assertTrue(\array_key_exists('Hyphen-Test', $elements)); + // Only_values = true. $content = '/NameType /FlateDecode'; $offset = 0;