From 56c8222cf692c166b12a966e1eaa8063f3185ea0 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Mon, 15 Dec 2025 23:30:43 +0800 Subject: [PATCH 1/3] llama-server: friendlier error msg when ctx < input This PR adds formatted strings to the server's send_error function --- tools/server/server-context.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 90898b5ec43..ce29162bc6d 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -1283,6 +1283,11 @@ struct server_context_impl { queue_results.send(std::move(res)); } + template + void send_error(const T & obj, const enum error_type type, const char * fmt, Args... args) { + send_error(obj, string_format(fmt, args...), type); + } + // if multimodal is enabled, send an error and return false bool check_no_mtmd(const int id_task) { if (mctx) { @@ -1962,19 +1967,28 @@ struct server_context_impl { if (!slot.can_split()) { if (slot.task->n_tokens() > n_ubatch) { - send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER); + send_error(slot, ERROR_TYPE_SERVER, + "input (%d tokens) is too large to process. increase the physical batch " + "size (current batch size: %d)", + slot.task->n_tokens(), n_ubatch); slot.release(); continue; } if (slot.task->n_tokens() > slot.n_ctx) { - send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_EXCEED_CONTEXT_SIZE); + send_error( + slot, ERROR_TYPE_EXCEED_CONTEXT_SIZE, + "input (%d tokens) is larger than the max context size (%d tokens). skipping", + slot.task->n_tokens(), slot.n_ctx); slot.release(); continue; } } else { if (slot.task->n_tokens() >= slot.n_ctx) { - send_error(slot, "the request exceeds the available context size, try increasing it", ERROR_TYPE_EXCEED_CONTEXT_SIZE); + send_error( + slot, ERROR_TYPE_EXCEED_CONTEXT_SIZE, + "request (%d tokens) exceeds available context size (%d tokens), try increasing it", + slot.task->n_tokens(), slot.n_ctx); slot.release(); continue; } From c6417bd958421a97703b8bb6db0bcd009cefec5c Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Fri, 19 Dec 2025 00:40:11 +0800 Subject: [PATCH 2/3] llama-server: use string_format inline --- tools/server/server-context.cpp | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index ce29162bc6d..a5ce67cea0e 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -1283,11 +1283,6 @@ struct server_context_impl { queue_results.send(std::move(res)); } - template - void send_error(const T & obj, const enum error_type type, const char * fmt, Args... args) { - send_error(obj, string_format(fmt, args...), type); - } - // if multimodal is enabled, send an error and return false bool check_no_mtmd(const int id_task) { if (mctx) { @@ -1967,28 +1962,33 @@ struct server_context_impl { if (!slot.can_split()) { if (slot.task->n_tokens() > n_ubatch) { - send_error(slot, ERROR_TYPE_SERVER, - "input (%d tokens) is too large to process. increase the physical batch " - "size (current batch size: %d)", - slot.task->n_tokens(), n_ubatch); + send_error(slot, + string_format( + "input (%d tokens) is too large to process. increase the physical batch " + "size (current batch size: %d)", + slot.task->n_tokens(), n_ubatch), + ERROR_TYPE_SERVER); slot.release(); continue; } if (slot.task->n_tokens() > slot.n_ctx) { send_error( - slot, ERROR_TYPE_EXCEED_CONTEXT_SIZE, - "input (%d tokens) is larger than the max context size (%d tokens). skipping", - slot.task->n_tokens(), slot.n_ctx); + slot, + string_format( + "input (%d tokens) is larger than the max context size (%d tokens). skipping", + slot.task->n_tokens(), slot.n_ctx), + ERROR_TYPE_EXCEED_CONTEXT_SIZE); slot.release(); continue; } } else { if (slot.task->n_tokens() >= slot.n_ctx) { - send_error( - slot, ERROR_TYPE_EXCEED_CONTEXT_SIZE, - "request (%d tokens) exceeds available context size (%d tokens), try increasing it", - slot.task->n_tokens(), slot.n_ctx); + send_error(slot, + string_format("request (%d tokens) exceeds available context size (%d " + "tokens), try increasing it", + slot.task->n_tokens(), slot.n_ctx), + ERROR_TYPE_EXCEED_CONTEXT_SIZE); slot.release(); continue; } From 0141889351b9c778b7992d192bee1b1997e9bfa4 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Fri, 19 Dec 2025 11:41:49 +0800 Subject: [PATCH 3/3] fix test --- tools/server/server-context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index a5ce67cea0e..7ff23bcb5be 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -1985,7 +1985,7 @@ struct server_context_impl { } else { if (slot.task->n_tokens() >= slot.n_ctx) { send_error(slot, - string_format("request (%d tokens) exceeds available context size (%d " + string_format("request (%d tokens) exceeds the available context size (%d " "tokens), try increasing it", slot.task->n_tokens(), slot.n_ctx), ERROR_TYPE_EXCEED_CONTEXT_SIZE);