Skip to content

Commit cc0a043

Browse files
authored
server: friendlier error msg when ctx < input (#18174)
* llama-server: friendlier error msg when ctx < input This PR adds formatted strings to the server's send_error function * llama-server: use string_format inline * fix test
1 parent 98c1c7a commit cc0a043

File tree

1 file changed

+17
-3
lines changed

1 file changed

+17
-3
lines changed

tools/server/server-context.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1974,19 +1974,33 @@ struct server_context_impl {
19741974

19751975
if (!slot.can_split()) {
19761976
if (slot.task->n_tokens() > n_ubatch) {
1977-
send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER);
1977+
send_error(slot,
1978+
string_format(
1979+
"input (%d tokens) is too large to process. increase the physical batch "
1980+
"size (current batch size: %d)",
1981+
slot.task->n_tokens(), n_ubatch),
1982+
ERROR_TYPE_SERVER);
19781983
slot.release();
19791984
continue;
19801985
}
19811986

19821987
if (slot.task->n_tokens() > slot.n_ctx) {
1983-
send_error(slot, "input is larger than the max context size. skipping", ERROR_TYPE_EXCEED_CONTEXT_SIZE);
1988+
send_error(
1989+
slot,
1990+
string_format(
1991+
"input (%d tokens) is larger than the max context size (%d tokens). skipping",
1992+
slot.task->n_tokens(), slot.n_ctx),
1993+
ERROR_TYPE_EXCEED_CONTEXT_SIZE);
19841994
slot.release();
19851995
continue;
19861996
}
19871997
} else {
19881998
if (slot.task->n_tokens() >= slot.n_ctx) {
1989-
send_error(slot, "the request exceeds the available context size, try increasing it", ERROR_TYPE_EXCEED_CONTEXT_SIZE);
1999+
send_error(slot,
2000+
string_format("request (%d tokens) exceeds the available context size (%d "
2001+
"tokens), try increasing it",
2002+
slot.task->n_tokens(), slot.n_ctx),
2003+
ERROR_TYPE_EXCEED_CONTEXT_SIZE);
19902004
slot.release();
19912005
continue;
19922006
}

0 commit comments

Comments
 (0)