Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 20 additions & 17 deletions tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.BufferedInputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.Reader;
Expand Down Expand Up @@ -185,27 +185,30 @@ public Integer call()
setTopMargin(margins[2]);
setBottomMargin(margins[3]);

boolean hasUtf8BOM = false;
if (charset.equals(StandardCharsets.UTF_8))
try (BufferedInputStream is = new BufferedInputStream(new FileInputStream(infile)))
{
// check for utf8 BOM
// FileInputStream doesn't support mark/reset
try (InputStream is = new FileInputStream(infile))
if (charset.equals(StandardCharsets.UTF_8))
{
if (is.read() == 0xEF && is.read() == 0xBB && is.read() == 0xBF)
final int readLimit = 3;
is.mark(readLimit);

byte[] firstBytes = new byte[readLimit];
if (is.read(firstBytes) != readLimit)
{
hasUtf8BOM = true;
throw new IOException("Could not read 3 bytes, size changed?!");
}
}
}
try (InputStream is = new FileInputStream(infile))
{
if (hasUtf8BOM)
{
long skipped = is.skip(3);
if (skipped != 3)

if (firstBytes[0] == (byte) 0xEF &&
firstBytes[1] == (byte) 0xBB &&
firstBytes[2] == (byte) 0xBF)
{
//UTF-8 with BOM
//3 bytes already read (skipped)
}
else
{
throw new IOException("Could not skip 3 bytes, size changed?!");
//It looks like UTF with no BOM or file was corrupted
is.reset();
}
}
try (Reader reader = new InputStreamReader(is, charset))
Expand Down