Skip to content

Commit e95991c

Browse files
committed
add support for chat sessions
today, to chat with a model, a user has to run one command per completion add --session which enables a user to have an interactive chat session with the inference model. --session can be passes with or without --message. If no --message is passed, the user is prompted to give the first message ``` llama-stack-client inference chat-completion --session >>> hi whats up! Assistant> Not much! How's your day going so far? Is there something I can help you with or would you like to chat? >>> what color is the sky? Assistant> The color of the sky can vary depending on the time of day and atmospheric conditions. Here are some common colors you might see: * During the daytime, when the sun is overhead, the sky typically appears blue. * At sunrise and sunset, the sky can take on hues of red, orange, pink, and purple due to the scattering of light by atmospheric particles. * On a clear day with no clouds, the sky can appear a bright blue, often referred to as "cerulean." * In areas with high levels of pollution or dust, the sky can appear more hazy or grayish. * At night, the sky can be dark and black, although some stars and moonlight can make it visible. So, what's your favorite color of the sky? >>> ``` Signed-off-by: Charlie Doern <cdoern@redhat.com>
1 parent 04bfdbe commit e95991c

File tree

1 file changed

+46
-13
lines changed

1 file changed

+46
-13
lines changed

src/llama_stack_client/lib/cli/inference/inference.py

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# This source code is licensed under the terms described in the LICENSE file in
55
# the root directory of this source tree.
66

7-
from typing import Optional
7+
from typing import Optional, List, Dict
88

99
import click
1010
from rich.console import Console
@@ -19,30 +19,63 @@ def inference():
1919

2020

2121
@click.command("chat-completion")
22-
@click.option("--message", required=True, help="Message")
22+
@click.option("--message", help="Message")
2323
@click.option("--stream", is_flag=True, help="Streaming", default=False)
24+
@click.option("--session", is_flag=True, help="Start a Chat Session", default=False)
2425
@click.option("--model-id", required=False, help="Model ID")
2526
@click.pass_context
2627
@handle_client_errors("inference chat-completion")
27-
def chat_completion(ctx, message: str, stream: bool, model_id: Optional[str]):
28+
def chat_completion(ctx, message: str, stream: bool, session: bool, model_id: Optional[str]):
2829
"""Show available inference chat completion endpoints on distribution endpoint"""
30+
if not message and not session:
31+
click.secho(
32+
"--message is required if not starting a chat session",
33+
fg="red",
34+
)
35+
raise click.exceptions.Exit(1)
2936
client = ctx.obj["client"]
3037
console = Console()
3138

3239
if not model_id:
3340
available_models = [model.identifier for model in client.models.list() if model.model_type == "llm"]
3441
model_id = available_models[0]
3542

36-
response = client.inference.chat_completion(
37-
model_id=model_id,
38-
messages=[{"role": "user", "content": message}],
39-
stream=stream,
40-
)
41-
if not stream:
42-
console.print(response)
43-
else:
44-
for event in EventLogger().log(response):
45-
event.print()
43+
messages = []
44+
if message:
45+
messages.append({"role": "user", "content": message})
46+
response = client.inference.chat_completion(
47+
model_id=model_id,
48+
messages=messages,
49+
stream=stream,
50+
)
51+
if not stream:
52+
console.print(response)
53+
else:
54+
for event in EventLogger().log(response):
55+
event.print()
56+
if session:
57+
chat_session(client=client, model_id=model_id, messages=messages, console=console)
58+
59+
60+
def chat_session(client, model_id: Optional[str], messages: List[Dict[str, str]], console: Console):
61+
"""Run an interactive chat session with the served model"""
62+
while True:
63+
try:
64+
message = input(">>> ")
65+
messages.append({"role": "user", "content": message})
66+
response = client.inference.chat_completion(
67+
model_id=model_id,
68+
messages=messages,
69+
stream=True,
70+
)
71+
for event in EventLogger().log(response):
72+
event.print()
73+
except Exception as exc:
74+
console.print(f"Error in chat session {exc}")
75+
break
76+
except KeyboardInterrupt as exc:
77+
console.print("\nDetected user interrupt, exiting")
78+
break
4679

4780

4881
# Register subcommands

0 commit comments

Comments
 (0)