From 56c8222cf692c166b12a966e1eaa8063f3185ea0 Mon Sep 17 00:00:00 2001
From: Aman Gupta <amangupta052@gmail.com>
Date: Mon, 15 Dec 2025 23:30:43 +0800
Subject: [PATCH 1/3] llama-server: friendlier error msg when ctx < input

This PR adds formatted strings to the server's send_error function
---
 tools/server/server-context.cpp | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index 90898b5ec43..ce29162bc6d 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -1283,6 +1283,11 @@ struct server_context_impl {
         queue_results.send(std::move(res));
     }
 
+    template <typename T, typename... Args>
+    void send_error(const T & obj, const enum error_type type, const char * fmt, Args... args) {
+        send_error(obj, string_format(fmt, args...), type);
+    }
+
     // if multimodal is enabled, send an error and return false
     bool check_no_mtmd(const int id_task) {
         if (mctx) {
@@ -1962,19 +1967,28 @@ struct server_context_impl {
 
                         if (!slot.can_split()) {
                             if (slot.task->n_tokens() > n_ubatch) {
-                                send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER);
+                                send_error(slot, ERROR_TYPE_SERVER,
+                                           "input (%d tokens) is too large to process. increase the physical batch "
+                                           "size (current batch size: %d)",
+                                           slot.task->n_tokens(), n_ubatch);
                                 slot.release();
                                 continue;
                             }
 
                             if (slot.task->n_tokens() > slot.n_ctx) {
-                                send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_EXCEED_CONTEXT_SIZE);
+                                send_error(
+                                    slot, ERROR_TYPE_EXCEED_CONTEXT_SIZE,
+                                    "input (%d tokens) is larger than the max context size (%d tokens). skipping",
+                                    slot.task->n_tokens(), slot.n_ctx);
                                 slot.release();
                                 continue;
                             }
                         } else {
                             if (slot.task->n_tokens() >= slot.n_ctx) {
-                                send_error(slot, "the request exceeds the available context size, try increasing it", ERROR_TYPE_EXCEED_CONTEXT_SIZE);
+                                send_error(
+                                    slot, ERROR_TYPE_EXCEED_CONTEXT_SIZE,
+                                    "request (%d tokens) exceeds available context size (%d tokens), try increasing it",
+                                    slot.task->n_tokens(), slot.n_ctx);
                                 slot.release();
                                 continue;
                             }

From c6417bd958421a97703b8bb6db0bcd009cefec5c Mon Sep 17 00:00:00 2001
From: Aman Gupta <amangupta052@gmail.com>
Date: Fri, 19 Dec 2025 00:40:11 +0800
Subject: [PATCH 2/3] llama-server: use string_format inline

---
 tools/server/server-context.cpp | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index ce29162bc6d..a5ce67cea0e 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -1283,11 +1283,6 @@ struct server_context_impl {
         queue_results.send(std::move(res));
     }
 
-    template <typename T, typename... Args>
-    void send_error(const T & obj, const enum error_type type, const char * fmt, Args... args) {
-        send_error(obj, string_format(fmt, args...), type);
-    }
-
     // if multimodal is enabled, send an error and return false
     bool check_no_mtmd(const int id_task) {
         if (mctx) {
@@ -1967,28 +1962,33 @@ struct server_context_impl {
 
                         if (!slot.can_split()) {
                             if (slot.task->n_tokens() > n_ubatch) {
-                                send_error(slot, ERROR_TYPE_SERVER,
-                                           "input (%d tokens) is too large to process. increase the physical batch "
-                                           "size (current batch size: %d)",
-                                           slot.task->n_tokens(), n_ubatch);
+                                send_error(slot,
+                                           string_format(
+                                               "input (%d tokens) is too large to process. increase the physical batch "
+                                               "size (current batch size: %d)",
+                                               slot.task->n_tokens(), n_ubatch),
+                                           ERROR_TYPE_SERVER);
                                 slot.release();
                                 continue;
                             }
 
                             if (slot.task->n_tokens() > slot.n_ctx) {
                                 send_error(
-                                    slot, ERROR_TYPE_EXCEED_CONTEXT_SIZE,
-                                    "input (%d tokens) is larger than the max context size (%d tokens). skipping",
-                                    slot.task->n_tokens(), slot.n_ctx);
+                                    slot,
+                                    string_format(
+                                        "input (%d tokens) is larger than the max context size (%d tokens). skipping",
+                                        slot.task->n_tokens(), slot.n_ctx),
+                                    ERROR_TYPE_EXCEED_CONTEXT_SIZE);
                                 slot.release();
                                 continue;
                             }
                         } else {
                             if (slot.task->n_tokens() >= slot.n_ctx) {
-                                send_error(
-                                    slot, ERROR_TYPE_EXCEED_CONTEXT_SIZE,
-                                    "request (%d tokens) exceeds available context size (%d tokens), try increasing it",
-                                    slot.task->n_tokens(), slot.n_ctx);
+                                send_error(slot,
+                                           string_format("request (%d tokens) exceeds available context size (%d "
+                                                         "tokens), try increasing it",
+                                                         slot.task->n_tokens(), slot.n_ctx),
+                                           ERROR_TYPE_EXCEED_CONTEXT_SIZE);
                                 slot.release();
                                 continue;
                             }

From 0141889351b9c778b7992d192bee1b1997e9bfa4 Mon Sep 17 00:00:00 2001
From: Aman Gupta <amangupta052@gmail.com>
Date: Fri, 19 Dec 2025 11:41:49 +0800
Subject: [PATCH 3/3] fix test

---
 tools/server/server-context.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index a5ce67cea0e..7ff23bcb5be 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -1985,7 +1985,7 @@ struct server_context_impl {
                         } else {
                             if (slot.task->n_tokens() >= slot.n_ctx) {
                                 send_error(slot,
-                                           string_format("request (%d tokens) exceeds available context size (%d "
+                                           string_format("request (%d tokens) exceeds the available context size (%d "
                                                          "tokens), try increasing it",
                                                          slot.task->n_tokens(), slot.n_ctx),
                                            ERROR_TYPE_EXCEED_CONTEXT_SIZE);