diff --git a/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java b/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java index 525dd084883..80622a94e86 100644 --- a/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java +++ b/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java @@ -20,7 +20,7 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStream; +import java.io.BufferedInputStream; import java.io.InputStreamReader; import java.io.PrintStream; import java.io.Reader; @@ -185,27 +185,30 @@ public Integer call() setTopMargin(margins[2]); setBottomMargin(margins[3]); - boolean hasUtf8BOM = false; - if (charset.equals(StandardCharsets.UTF_8)) + try (BufferedInputStream is = new BufferedInputStream(new FileInputStream(infile))) { - // check for utf8 BOM - // FileInputStream doesn't support mark/reset - try (InputStream is = new FileInputStream(infile)) + if (charset.equals(StandardCharsets.UTF_8)) { - if (is.read() == 0xEF && is.read() == 0xBB && is.read() == 0xBF) + final int readLimit = 3; + is.mark(readLimit); + + byte[] firstBytes = new byte[readLimit]; + if (is.read(firstBytes) != readLimit) { - hasUtf8BOM = true; + throw new IOException("Could not read 3 bytes, size changed?!"); } - } - } - try (InputStream is = new FileInputStream(infile)) - { - if (hasUtf8BOM) - { - long skipped = is.skip(3); - if (skipped != 3) + + if (firstBytes[0] == (byte) 0xEF && + firstBytes[1] == (byte) 0xBB && + firstBytes[2] == (byte) 0xBF) + { + //UTF-8 with BOM + //3 bytes already read (skipped) + } + else { - throw new IOException("Could not skip 3 bytes, size changed?!"); + //It looks like UTF with no BOM or file was corrupted + is.reset(); } } try (Reader reader = new InputStreamReader(is, charset))