Skip to content

Commit e7bf9ec

Browse files
committed
PDFBOX-6145: revert last change because it breaks tika with pages that have no contents but have annotations
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1931313 13f79535-47bb-0310-9956-ffa450edef68
1 parent 8d52d50 commit e7bf9ec

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,10 @@ protected void processPages(PDPageTree pages) throws IOException
298298

299299
for (PDPage page : pages)
300300
{
301-
processPage(page);
301+
if (page.hasContents())
302+
{
303+
processPage(page);
304+
}
302305
currentPageNo++;
303306
}
304307
}
@@ -340,10 +343,6 @@ public void processPage(PDPage page) throws IOException
340343
&& (startBookmarkPageNumber == -1 || currentPageNo >= startBookmarkPageNumber)
341344
&& (endBookmarkPageNumber == -1 || currentPageNo <= endBookmarkPageNumber))
342345
{
343-
if (!page.hasContents())
344-
{
345-
return;
346-
}
347346
startPage(page);
348347

349348
int numberOfArticleSections = 1;

0 commit comments

Comments
 (0)