File tree Expand file tree Collapse file tree 1 file changed +6
-9
lines changed
Expand file tree Collapse file tree 1 file changed +6
-9
lines changed Original file line number Diff line number Diff line change @@ -127,9 +127,7 @@ def __init__(
127127 self .last_n_tokens_size = last_n_tokens_size
128128 self .n_batch = min (n_ctx , n_batch )
129129 self .eval_tokens : Deque [llama_cpp .llama_token ] = deque (maxlen = n_ctx )
130- self .eval_logits : Deque [List [float ]] = deque (
131- maxlen = n_ctx if logits_all else 1
132- )
130+ self .eval_logits : Deque [List [float ]] = deque (maxlen = n_ctx if logits_all else 1 )
133131
134132 self .cache : Optional [LlamaCache ] = None
135133
@@ -547,12 +545,6 @@ def _create_completion(
547545 finish_reason = "stop"
548546 break
549547
550- if self .cache and len (completion_tokens ) == 0 :
551- if prompt_tokens not in self .cache :
552- if self .verbose :
553- print ("Llama._create_completion: cache miss" , file = sys .stderr )
554- self .cache [prompt_tokens ] = self .save_state ()
555-
556548 completion_tokens .append (token )
557549
558550 all_text = self .detokenize (completion_tokens )
@@ -611,6 +603,11 @@ def _create_completion(
611603 finish_reason = "length"
612604 break
613605
606+ if self .cache :
607+ if self .verbose :
608+ print ("Llama._create_completion: cache save" , file = sys .stderr )
609+ self .cache [prompt_tokens + completion_tokens ] = self .save_state ()
610+
614611 if stream :
615612 yield {
616613 "id" : completion_id ,
You can’t perform that action at this time.
0 commit comments