Skip to content

Commit 0d815b8

Browse files
committed
PDFBOX-5660: don't open twice, as suggested by Valery Bokov; closes #388
git-svn-id: https://svn.apache.org/repos/asf/pdfbox/trunk@1931274 13f79535-47bb-0310-9956-ffa450edef68
1 parent e99bf02 commit 0d815b8

File tree

1 file changed

+20
-17
lines changed

1 file changed

+20
-17
lines changed

tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import java.io.File;
2121
import java.io.FileInputStream;
2222
import java.io.IOException;
23-
import java.io.InputStream;
23+
import java.io.BufferedInputStream;
2424
import java.io.InputStreamReader;
2525
import java.io.PrintStream;
2626
import java.io.Reader;
@@ -185,27 +185,30 @@ public Integer call()
185185
setTopMargin(margins[2]);
186186
setBottomMargin(margins[3]);
187187

188-
boolean hasUtf8BOM = false;
189-
if (charset.equals(StandardCharsets.UTF_8))
188+
try (BufferedInputStream is = new BufferedInputStream(new FileInputStream(infile)))
190189
{
191-
// check for utf8 BOM
192-
// FileInputStream doesn't support mark/reset
193-
try (InputStream is = new FileInputStream(infile))
190+
if (charset.equals(StandardCharsets.UTF_8))
194191
{
195-
if (is.read() == 0xEF && is.read() == 0xBB && is.read() == 0xBF)
192+
final int readLimit = 3;
193+
is.mark(readLimit);
194+
195+
byte[] firstBytes = new byte[readLimit];
196+
if (is.read(firstBytes) != readLimit)
196197
{
197-
hasUtf8BOM = true;
198+
throw new IOException("Could not read 3 bytes, size changed?!");
198199
}
199-
}
200-
}
201-
try (InputStream is = new FileInputStream(infile))
202-
{
203-
if (hasUtf8BOM)
204-
{
205-
long skipped = is.skip(3);
206-
if (skipped != 3)
200+
201+
if (firstBytes[0] == (byte) 0xEF &&
202+
firstBytes[1] == (byte) 0xBB &&
203+
firstBytes[2] == (byte) 0xBF)
204+
{
205+
//UTF-8 with BOM
206+
//3 bytes already read (skipped)
207+
}
208+
else
207209
{
208-
throw new IOException("Could not skip 3 bytes, size changed?!");
210+
//It looks like UTF with no BOM or file was corrupted
211+
is.reset();
209212
}
210213
}
211214
try (Reader reader = new InputStreamReader(is, charset))

0 commit comments

Comments
 (0)