|
20 | 20 | import java.io.File; |
21 | 21 | import java.io.FileInputStream; |
22 | 22 | import java.io.IOException; |
23 | | -import java.io.InputStream; |
| 23 | +import java.io.BufferedInputStream; |
24 | 24 | import java.io.InputStreamReader; |
25 | 25 | import java.io.PrintStream; |
26 | 26 | import java.io.Reader; |
@@ -185,27 +185,30 @@ public Integer call() |
185 | 185 | setTopMargin(margins[2]); |
186 | 186 | setBottomMargin(margins[3]); |
187 | 187 |
|
188 | | - boolean hasUtf8BOM = false; |
189 | | - if (charset.equals(StandardCharsets.UTF_8)) |
| 188 | + try (BufferedInputStream is = new BufferedInputStream(new FileInputStream(infile))) |
190 | 189 | { |
191 | | - // check for utf8 BOM |
192 | | - // FileInputStream doesn't support mark/reset |
193 | | - try (InputStream is = new FileInputStream(infile)) |
| 190 | + if (charset.equals(StandardCharsets.UTF_8)) |
194 | 191 | { |
195 | | - if (is.read() == 0xEF && is.read() == 0xBB && is.read() == 0xBF) |
| 192 | + final int readLimit = 3; |
| 193 | + is.mark(readLimit); |
| 194 | + |
| 195 | + byte[] firstBytes = new byte[readLimit]; |
| 196 | + if (is.read(firstBytes) != readLimit) |
196 | 197 | { |
197 | | - hasUtf8BOM = true; |
| 198 | + throw new IOException("Could not read 3 bytes, size changed?!"); |
198 | 199 | } |
199 | | - } |
200 | | - } |
201 | | - try (InputStream is = new FileInputStream(infile)) |
202 | | - { |
203 | | - if (hasUtf8BOM) |
204 | | - { |
205 | | - long skipped = is.skip(3); |
206 | | - if (skipped != 3) |
| 200 | + |
| 201 | + if (firstBytes[0] == (byte) 0xEF && |
| 202 | + firstBytes[1] == (byte) 0xBB && |
| 203 | + firstBytes[2] == (byte) 0xBF) |
| 204 | + { |
| 205 | + //UTF-8 with BOM |
| 206 | + //3 bytes already read (skipped) |
| 207 | + } |
| 208 | + else |
207 | 209 | { |
208 | | - throw new IOException("Could not skip 3 bytes, size changed?!"); |
| 210 | + //It looks like UTF with no BOM or file was corrupted |
| 211 | + is.reset(); |
209 | 212 | } |
210 | 213 | } |
211 | 214 | try (Reader reader = new InputStreamReader(is, charset)) |
|
0 commit comments