Skip to content

Commit b5fcd04

Browse files
committed
connect to local ollama
1 parent 6f2db27 commit b5fcd04

File tree

4 files changed

+79
-50
lines changed

4 files changed

+79
-50
lines changed

.env

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@ PYTHONDONTWRITEBYTECODE=1
22
PYTHONUNBUFFERED=1
33

44
# Postgres
5-
POSTGRES_HOST=db
5+
POSTGRES_HOST=localhost
66
POSTGRES_PORT=5432
77
POSTGRES_DB=devdb
88
POSTGRES_USER=devdb
99
POSTGRES_PASSWORD=secret
1010

1111
# Redis
12-
REDIS_HOST=inmemory
12+
REDIS_HOST=localhost
1313
REDIS_PORT=6379
1414
REDIS_DB=2
1515

app/services/llm.py

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,36 +10,42 @@ def __init__(self, base_url: str = "http://localhost:11434/v1"):
1010

1111
async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]:
1212
"""Stream chat completion responses from LLM."""
13-
# Send initial user message
14-
yield orjson.dumps({"role": "user", "content": prompt}) + b"\n"
13+
# Send user message first
14+
user_msg = {
15+
"role": "user",
16+
"content": prompt,
17+
}
18+
yield orjson.dumps(user_msg) + b"\n"
1519

20+
# Open client as context manager and stream responses
1621
async with httpx.AsyncClient(base_url=self.base_url) as client:
17-
request_data = {
18-
"model": self.model,
19-
"messages": [{"role": "user", "content": prompt}],
20-
"stream": True,
21-
}
22-
2322
async with client.stream(
24-
"POST", "/chat/completions", json=request_data, timeout=60.0
23+
"POST",
24+
"/chat/completions",
25+
json={
26+
"model": self.model,
27+
"messages": [{"role": "user", "content": prompt}],
28+
"stream": True,
29+
},
30+
timeout=60.0,
2531
) as response:
2632
async for line in response.aiter_lines():
27-
if not (line.startswith("data: ") and line != "data: [DONE]"):
28-
continue
29-
try:
30-
data = orjson.loads(line[6:]) # Skip "data: " prefix
31-
if (
32-
content := data.get("choices", [{}])[0]
33-
.get("delta", {})
34-
.get("content", "")
35-
):
36-
yield (
37-
orjson.dumps({"role": "model", "content": content})
38-
+ b"\n"
33+
if line.startswith("data: ") and line != "data: [DONE]":
34+
try:
35+
json_line = line[6:] # Remove "data: " prefix
36+
data = orjson.loads(json_line)
37+
content = (
38+
data.get("choices", [{}])[0]
39+
.get("delta", {})
40+
.get("content", "")
3941
)
40-
except Exception:
41-
pass
42+
if content:
43+
model_msg = {"role": "model", "content": content}
44+
yield orjson.dumps(model_msg) + b"\n"
45+
except Exception:
46+
pass
4247

4348

49+
# FastAPI dependency
4450
def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService:
45-
return StreamLLMService(base_url=base_url)
51+
return StreamLLMService(base_url=base_url or "http://localhost:11434/v1")

compose.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
services:
22
app:
33
container_name: fsap_app
4+
network_mode: host
45
build: .
56
env_file:
67
- .env
@@ -22,6 +23,7 @@ services:
2223

2324
db:
2425
container_name: fsap_db
26+
network_mode: host
2527
build:
2628
context: ./db
2729
dockerfile: Dockerfile
@@ -46,6 +48,7 @@ services:
4648

4749
inmemory:
4850
image: redis:latest
51+
network_mode: host
4952
container_name: fsap_inmemory
5053
ports:
5154
- "6379:6379"

tests/chat.py

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,53 @@
1-
import anyio
1+
from typing import Optional, AsyncGenerator
2+
23
import httpx
34
import orjson
45

5-
API_URL = "http://localhost:8000/chat/"
6-
76

8-
async def chat_with_endpoint():
9-
async with httpx.AsyncClient() as client:
10-
while True:
11-
prompt = input("\nYou: ")
12-
if prompt.lower() == "exit":
13-
break
7+
class StreamLLMService:
8+
def __init__(self, base_url: str = "http://localhost:11434/v1"):
9+
self.base_url = base_url
10+
self.model = "llama3.2"
1411

15-
print("\nModel: ", end="", flush=True)
16-
try:
17-
async with client.stream(
18-
"POST", API_URL, data={"prompt": prompt}, timeout=60
19-
) as response:
20-
async for chunk in response.aiter_lines():
21-
if not chunk:
22-
continue
12+
async def stream_chat(self, prompt: str) -> AsyncGenerator[bytes, None]:
13+
"""Stream chat completion responses from LLM."""
14+
# Send user message first
15+
user_msg = {
16+
"role": "user",
17+
"content": prompt,
18+
}
19+
yield orjson.dumps(user_msg) + b"\n"
2320

21+
# Open client as context manager and stream responses
22+
async with httpx.AsyncClient(base_url=self.base_url) as client:
23+
async with client.stream(
24+
"POST",
25+
"/chat/completions",
26+
json={
27+
"model": self.model,
28+
"messages": [{"role": "user", "content": prompt}],
29+
"stream": True,
30+
},
31+
timeout=60.0,
32+
) as response:
33+
async for line in response.aiter_lines():
34+
print(line)
35+
if line.startswith("data: ") and line != "data: [DONE]":
2436
try:
25-
print(orjson.loads(chunk)["content"], end="", flush=True)
26-
except Exception as e:
27-
print(f"\nError parsing chunk: {e}")
28-
except httpx.RequestError as e:
29-
print(f"\nConnection error: {e}")
37+
json_line = line[6:] # Remove "data: " prefix
38+
data = orjson.loads(json_line)
39+
content = (
40+
data.get("choices", [{}])[0]
41+
.get("delta", {})
42+
.get("content", "")
43+
)
44+
if content:
45+
model_msg = {"role": "model", "content": content}
46+
yield orjson.dumps(model_msg) + b"\n"
47+
except Exception:
48+
pass
3049

3150

32-
if __name__ == "__main__":
33-
anyio.run(chat_with_endpoint)
51+
# FastAPI dependency
52+
def get_llm_service(base_url: Optional[str] = None) -> StreamLLMService:
53+
return StreamLLMService(base_url=base_url or "http://localhost:11434/v1")

0 commit comments

Comments
 (0)