diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index 90898b5ec43..7ff23bcb5be 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -1962,19 +1962,33 @@ struct server_context_impl { if (!slot.can_split()) { if (slot.task->n_tokens() > n_ubatch) { - send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER); + send_error(slot, + string_format( + "input (%d tokens) is too large to process. increase the physical batch " + "size (current batch size: %d)", + slot.task->n_tokens(), n_ubatch), + ERROR_TYPE_SERVER); slot.release(); continue; } if (slot.task->n_tokens() > slot.n_ctx) { - send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_EXCEED_CONTEXT_SIZE); + send_error( + slot, + string_format( + "input (%d tokens) is larger than the max context size (%d tokens). skipping", + slot.task->n_tokens(), slot.n_ctx), + ERROR_TYPE_EXCEED_CONTEXT_SIZE); slot.release(); continue; } } else { if (slot.task->n_tokens() >= slot.n_ctx) { - send_error(slot, "the request exceeds the available context size, try increasing it", ERROR_TYPE_EXCEED_CONTEXT_SIZE); + send_error(slot, + string_format("request (%d tokens) exceeds the available context size (%d " + "tokens), try increasing it", + slot.task->n_tokens(), slot.n_ctx), + ERROR_TYPE_EXCEED_CONTEXT_SIZE); slot.release(); continue; }