diff --git a/samples/bugs/Issue331.pdf b/samples/bugs/Issue331.pdf new file mode 100644 index 00000000..17c8765b Binary files /dev/null and b/samples/bugs/Issue331.pdf differ diff --git a/src/Smalot/PdfParser/Pages.php b/src/Smalot/PdfParser/Pages.php index 47d90c5d..12116b58 100644 --- a/src/Smalot/PdfParser/Pages.php +++ b/src/Smalot/PdfParser/Pages.php @@ -38,6 +38,9 @@ class Pages extends PDFObject /** * @param bool $deep * + * @todo Objects other than Pages or Page might need to be treated specifically in order to get Page objects out of them, + * see https://github.com/smalot/pdfparser/issues/331 + * * @return array */ public function getPages($deep = false) @@ -56,7 +59,7 @@ public function getPages($deep = false) foreach ($kids as $kid) { if ($kid instanceof self) { $pages = array_merge($pages, $kid->getPages(true)); - } else { + } elseif ($kid instanceof Page) { $pages[] = $kid; } } diff --git a/tests/Integration/PageTest.php b/tests/Integration/PageTest.php index 603f5924..93525954 100644 --- a/tests/Integration/PageTest.php +++ b/tests/Integration/PageTest.php @@ -447,6 +447,32 @@ public function testGetDataTmIssue336() $this->assertEquals('Lorem', $item[1]); } + /** + * Tests that getPages() only returns Page objects + * + * @see https://github.com/smalot/pdfparser/issues/331 + * + * Sample pdf file provided by @Reqrefusion, see + * https://github.com/smalot/pdfparser/pull/350#issuecomment-703195220 + */ + public function testGetPages() + { + $filename = $this->rootDir.'/samples/bugs/Issue331.pdf'; + $document = $this->getParserInstance()->parseFile($filename); + $pages = $document->getPages(); + + // This should actually be 3 pages, but as long as the cause for issue #331 + // has not been found and the issue is not fixed, we'll settle for 2 here. + // We still test for the count, so in case the bug should be fixed + // unknowingly, we don't forget to resolve the issue as well and make sure + // this assertion is present. + $this->assertCount(2, $pages); + + foreach ($pages as $page) { + $this->assertTrue($page instanceof Page); + } + } + public function testGetTextXY() { // Document with text.