From ffc6b82b787e7506566690e27b8c86f056863bc4 Mon Sep 17 00:00:00 2001
From: "valery.bokov" <vlry.bkv@gmail.com>
Date: Thu, 1 Jan 2026 16:38:44 +0100
Subject: [PATCH 1/2] refactor TextToPDF.call method

---
 .../org/apache/pdfbox/tools/TextToPDF.java    | 26 ++++++++-----------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java b/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java
index 525dd084883..0662f55a983 100644
--- a/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java
+++ b/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java
@@ -185,25 +185,21 @@ public Integer call()
             setTopMargin(margins[2]);
             setBottomMargin(margins[3]);
 
-            boolean hasUtf8BOM = false;
-            if (charset.equals(StandardCharsets.UTF_8))
+            try (InputStream is = new FileInputStream(infile))
             {
-                // check for utf8 BOM
-                // FileInputStream doesn't support mark/reset
-                try (InputStream is = new FileInputStream(infile))
+                if (charset.equals(StandardCharsets.UTF_8))
                 {
-                    if (is.read() == 0xEF && is.read() == 0xBB && is.read() == 0xBF)
+                    try
                     {
-                        hasUtf8BOM = true;
+                        // check for utf8 BOM
+                        // FileInputStream doesn't support mark/reset
+                        int b1 = is.read();
+                        int b2 = is.read();
+                        int b3 = is.read();
+                        //todo Here we can perform a check for file format corruption here.
+                        boolean hasUtf8BOM = b1 == 0xEF && b2 == 0xBB && b3 == 0xBF;
                     }
-                }
-            }
-            try (InputStream is = new FileInputStream(infile))
-            {
-                if (hasUtf8BOM)
-                {
-                    long skipped = is.skip(3);
-                    if (skipped != 3)
+                    catch (IOException x)
                     {
                         throw new IOException("Could not skip 3 bytes, size changed?!");
                     }

From a6356cf0b72b37f71eeab59d1178e9518bd58512 Mon Sep 17 00:00:00 2001
From: "valery.bokov" <vlry.bkv@gmail.com>
Date: Mon, 12 Jan 2026 18:31:17 +0100
Subject: [PATCH 2/2] support UTF8 with BOM and no BOM in TextToPDF.call

---
 .../org/apache/pdfbox/tools/TextToPDF.java    | 31 ++++++++++++-------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java b/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java
index 0662f55a983..80622a94e86 100644
--- a/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java
+++ b/tools/src/main/java/org/apache/pdfbox/tools/TextToPDF.java
@@ -20,7 +20,7 @@
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
-import java.io.InputStream;
+import java.io.BufferedInputStream;
 import java.io.InputStreamReader;
 import java.io.PrintStream;
 import java.io.Reader;
@@ -185,23 +185,30 @@ public Integer call()
             setTopMargin(margins[2]);
             setBottomMargin(margins[3]);
 
-            try (InputStream is = new FileInputStream(infile))
+            try (BufferedInputStream is = new BufferedInputStream(new FileInputStream(infile)))
             {
                 if (charset.equals(StandardCharsets.UTF_8))
                 {
-                    try
+                    final int readLimit = 3;
+                    is.mark(readLimit);
+
+                    byte[] firstBytes = new byte[readLimit];
+                    if (is.read(firstBytes) != readLimit)
                     {
-                        // check for utf8 BOM
-                        // FileInputStream doesn't support mark/reset
-                        int b1 = is.read();
-                        int b2 = is.read();
-                        int b3 = is.read();
-                        //todo Here we can perform a check for file format corruption here.
-                        boolean hasUtf8BOM = b1 == 0xEF && b2 == 0xBB && b3 == 0xBF;
+                        throw new IOException("Could not read 3 bytes, size changed?!");
                     }
-                    catch (IOException x)
+
+                    if (firstBytes[0] == (byte) 0xEF &&
+                        firstBytes[1] == (byte) 0xBB &&
+                        firstBytes[2] == (byte) 0xBF)
+                    {
+                        //UTF-8 with BOM
+                        //3 bytes already read (skipped)
+                    }
+                    else
                     {
-                        throw new IOException("Could not skip 3 bytes, size changed?!");
+                        //It looks like UTF with no BOM or file was corrupted
+                        is.reset();
                     }
                 }
                 try (Reader reader = new InputStreamReader(is, charset))