Skip to content

Commit 89ec5a7

Browse files
feat(vector_io): Implement Contextual Retrieval for improved RAG search quality
1 parent 4f8bf45 commit 89ec5a7

File tree

5 files changed

+206
-6
lines changed

5 files changed

+206
-6
lines changed

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 108
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-373eb8eb3cc02e6f8a9fa33079a5e735886fbf62958ee83e3cdef7bb4c41be37.yml
3-
openapi_spec_hash: fe1fa50161da4f095d128b0de7787e96
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/llamastack%2Fllama-stack-client-621e8b8ef37d5ebf024fe3bf6a59486a90debf01acca2c9bb4e9032e2dff92d3.yml
3+
openapi_spec_hash: 51f623cd3ea4addf8f939dd4ef8962c8
44
config_hash: 6aa61d4143c3e3df785972c0287d1370

src/llama_stack_client/types/vector_store_create_params.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
"ChunkingStrategyVectorStoreChunkingStrategyAuto",
2020
"ChunkingStrategyVectorStoreChunkingStrategyStatic",
2121
"ChunkingStrategyVectorStoreChunkingStrategyStaticStatic",
22+
"ChunkingStrategyVectorStoreChunkingStrategyContextual",
23+
"ChunkingStrategyVectorStoreChunkingStrategyContextualContextual",
2224
]
2325

2426

@@ -58,6 +60,53 @@ class ChunkingStrategyVectorStoreChunkingStrategyStatic(TypedDict, total=False):
5860
type: Literal["static"]
5961

6062

63+
class ChunkingStrategyVectorStoreChunkingStrategyContextualContextual(TypedDict, total=False):
64+
"""Configuration for contextual chunking."""
65+
66+
chunk_overlap_tokens: int
67+
"""Tokens to overlap between adjacent chunks.
68+
69+
Must be less than max_chunk_size_tokens.
70+
"""
71+
72+
context_prompt: str
73+
"""Prompt template for contextual retrieval.
74+
75+
Uses WHOLE_DOCUMENT and CHUNK_CONTENT placeholders wrapped in double curly
76+
braces.
77+
"""
78+
79+
max_chunk_size_tokens: int
80+
"""Maximum tokens per chunk. Suggested ~700 to allow room for prepended context."""
81+
82+
max_concurrency: Optional[int]
83+
"""Maximum concurrent LLM calls. Falls back to config default if not provided."""
84+
85+
model_id: Optional[str]
86+
"""LLM model for generating context.
87+
88+
Falls back to VectorStoresConfig.contextual_retrieval_params.model if not
89+
provided.
90+
"""
91+
92+
timeout_seconds: Optional[int]
93+
"""Timeout per LLM call in seconds. Falls back to config default if not provided."""
94+
95+
96+
class ChunkingStrategyVectorStoreChunkingStrategyContextual(TypedDict, total=False):
97+
"""
98+
Contextual chunking strategy that uses an LLM to situate chunks within the document.
99+
"""
100+
101+
contextual: Required[ChunkingStrategyVectorStoreChunkingStrategyContextualContextual]
102+
"""Configuration for contextual chunking."""
103+
104+
type: Literal["contextual"]
105+
"""Strategy type identifier."""
106+
107+
61108
ChunkingStrategy: TypeAlias = Union[
62-
ChunkingStrategyVectorStoreChunkingStrategyAuto, ChunkingStrategyVectorStoreChunkingStrategyStatic
109+
ChunkingStrategyVectorStoreChunkingStrategyAuto,
110+
ChunkingStrategyVectorStoreChunkingStrategyStatic,
111+
ChunkingStrategyVectorStoreChunkingStrategyContextual,
63112
]

src/llama_stack_client/types/vector_stores/file_batch_create_params.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
"ChunkingStrategyVectorStoreChunkingStrategyAuto",
2020
"ChunkingStrategyVectorStoreChunkingStrategyStatic",
2121
"ChunkingStrategyVectorStoreChunkingStrategyStaticStatic",
22+
"ChunkingStrategyVectorStoreChunkingStrategyContextual",
23+
"ChunkingStrategyVectorStoreChunkingStrategyContextualContextual",
2224
]
2325

2426

@@ -54,6 +56,53 @@ class ChunkingStrategyVectorStoreChunkingStrategyStatic(TypedDict, total=False):
5456
type: Literal["static"]
5557

5658

59+
class ChunkingStrategyVectorStoreChunkingStrategyContextualContextual(TypedDict, total=False):
60+
"""Configuration for contextual chunking."""
61+
62+
chunk_overlap_tokens: int
63+
"""Tokens to overlap between adjacent chunks.
64+
65+
Must be less than max_chunk_size_tokens.
66+
"""
67+
68+
context_prompt: str
69+
"""Prompt template for contextual retrieval.
70+
71+
Uses WHOLE_DOCUMENT and CHUNK_CONTENT placeholders wrapped in double curly
72+
braces.
73+
"""
74+
75+
max_chunk_size_tokens: int
76+
"""Maximum tokens per chunk. Suggested ~700 to allow room for prepended context."""
77+
78+
max_concurrency: Optional[int]
79+
"""Maximum concurrent LLM calls. Falls back to config default if not provided."""
80+
81+
model_id: Optional[str]
82+
"""LLM model for generating context.
83+
84+
Falls back to VectorStoresConfig.contextual_retrieval_params.model if not
85+
provided.
86+
"""
87+
88+
timeout_seconds: Optional[int]
89+
"""Timeout per LLM call in seconds. Falls back to config default if not provided."""
90+
91+
92+
class ChunkingStrategyVectorStoreChunkingStrategyContextual(TypedDict, total=False):
93+
"""
94+
Contextual chunking strategy that uses an LLM to situate chunks within the document.
95+
"""
96+
97+
contextual: Required[ChunkingStrategyVectorStoreChunkingStrategyContextualContextual]
98+
"""Configuration for contextual chunking."""
99+
100+
type: Literal["contextual"]
101+
"""Strategy type identifier."""
102+
103+
57104
ChunkingStrategy: TypeAlias = Union[
58-
ChunkingStrategyVectorStoreChunkingStrategyAuto, ChunkingStrategyVectorStoreChunkingStrategyStatic
105+
ChunkingStrategyVectorStoreChunkingStrategyAuto,
106+
ChunkingStrategyVectorStoreChunkingStrategyStatic,
107+
ChunkingStrategyVectorStoreChunkingStrategyContextual,
59108
]

src/llama_stack_client/types/vector_stores/file_create_params.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
"ChunkingStrategyVectorStoreChunkingStrategyAuto",
1818
"ChunkingStrategyVectorStoreChunkingStrategyStatic",
1919
"ChunkingStrategyVectorStoreChunkingStrategyStaticStatic",
20+
"ChunkingStrategyVectorStoreChunkingStrategyContextual",
21+
"ChunkingStrategyVectorStoreChunkingStrategyContextualContextual",
2022
]
2123

2224

@@ -54,6 +56,53 @@ class ChunkingStrategyVectorStoreChunkingStrategyStatic(TypedDict, total=False):
5456
type: Literal["static"]
5557

5658

59+
class ChunkingStrategyVectorStoreChunkingStrategyContextualContextual(TypedDict, total=False):
60+
"""Configuration for contextual chunking."""
61+
62+
chunk_overlap_tokens: int
63+
"""Tokens to overlap between adjacent chunks.
64+
65+
Must be less than max_chunk_size_tokens.
66+
"""
67+
68+
context_prompt: str
69+
"""Prompt template for contextual retrieval.
70+
71+
Uses WHOLE_DOCUMENT and CHUNK_CONTENT placeholders wrapped in double curly
72+
braces.
73+
"""
74+
75+
max_chunk_size_tokens: int
76+
"""Maximum tokens per chunk. Suggested ~700 to allow room for prepended context."""
77+
78+
max_concurrency: Optional[int]
79+
"""Maximum concurrent LLM calls. Falls back to config default if not provided."""
80+
81+
model_id: Optional[str]
82+
"""LLM model for generating context.
83+
84+
Falls back to VectorStoresConfig.contextual_retrieval_params.model if not
85+
provided.
86+
"""
87+
88+
timeout_seconds: Optional[int]
89+
"""Timeout per LLM call in seconds. Falls back to config default if not provided."""
90+
91+
92+
class ChunkingStrategyVectorStoreChunkingStrategyContextual(TypedDict, total=False):
93+
"""
94+
Contextual chunking strategy that uses an LLM to situate chunks within the document.
95+
"""
96+
97+
contextual: Required[ChunkingStrategyVectorStoreChunkingStrategyContextualContextual]
98+
"""Configuration for contextual chunking."""
99+
100+
type: Literal["contextual"]
101+
"""Strategy type identifier."""
102+
103+
57104
ChunkingStrategy: TypeAlias = Union[
58-
ChunkingStrategyVectorStoreChunkingStrategyAuto, ChunkingStrategyVectorStoreChunkingStrategyStatic
105+
ChunkingStrategyVectorStoreChunkingStrategyAuto,
106+
ChunkingStrategyVectorStoreChunkingStrategyStatic,
107+
ChunkingStrategyVectorStoreChunkingStrategyContextual,
59108
]

src/llama_stack_client/types/vector_stores/vector_store_file.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from typing import Dict, Union, Optional
1010
from typing_extensions import Literal, Annotated, TypeAlias
1111

12+
from pydantic import Field as FieldInfo
13+
1214
from ..._utils import PropertyInfo
1315
from ..._models import BaseModel
1416

@@ -18,6 +20,8 @@
1820
"ChunkingStrategyVectorStoreChunkingStrategyAuto",
1921
"ChunkingStrategyVectorStoreChunkingStrategyStatic",
2022
"ChunkingStrategyVectorStoreChunkingStrategyStaticStatic",
23+
"ChunkingStrategyVectorStoreChunkingStrategyContextual",
24+
"ChunkingStrategyVectorStoreChunkingStrategyContextualContextual",
2125
"LastError",
2226
]
2327

@@ -45,8 +49,57 @@ class ChunkingStrategyVectorStoreChunkingStrategyStatic(BaseModel):
4549
type: Optional[Literal["static"]] = None
4650

4751

52+
class ChunkingStrategyVectorStoreChunkingStrategyContextualContextual(BaseModel):
53+
"""Configuration for contextual chunking."""
54+
55+
chunk_overlap_tokens: Optional[int] = None
56+
"""Tokens to overlap between adjacent chunks.
57+
58+
Must be less than max_chunk_size_tokens.
59+
"""
60+
61+
context_prompt: Optional[str] = None
62+
"""Prompt template for contextual retrieval.
63+
64+
Uses WHOLE_DOCUMENT and CHUNK_CONTENT placeholders wrapped in double curly
65+
braces.
66+
"""
67+
68+
max_chunk_size_tokens: Optional[int] = None
69+
"""Maximum tokens per chunk. Suggested ~700 to allow room for prepended context."""
70+
71+
max_concurrency: Optional[int] = None
72+
"""Maximum concurrent LLM calls. Falls back to config default if not provided."""
73+
74+
api_model_id: Optional[str] = FieldInfo(alias="model_id", default=None)
75+
"""LLM model for generating context.
76+
77+
Falls back to VectorStoresConfig.contextual_retrieval_params.model if not
78+
provided.
79+
"""
80+
81+
timeout_seconds: Optional[int] = None
82+
"""Timeout per LLM call in seconds. Falls back to config default if not provided."""
83+
84+
85+
class ChunkingStrategyVectorStoreChunkingStrategyContextual(BaseModel):
86+
"""
87+
Contextual chunking strategy that uses an LLM to situate chunks within the document.
88+
"""
89+
90+
contextual: ChunkingStrategyVectorStoreChunkingStrategyContextualContextual
91+
"""Configuration for contextual chunking."""
92+
93+
type: Optional[Literal["contextual"]] = None
94+
"""Strategy type identifier."""
95+
96+
4897
ChunkingStrategy: TypeAlias = Annotated[
49-
Union[ChunkingStrategyVectorStoreChunkingStrategyAuto, ChunkingStrategyVectorStoreChunkingStrategyStatic],
98+
Union[
99+
ChunkingStrategyVectorStoreChunkingStrategyAuto,
100+
ChunkingStrategyVectorStoreChunkingStrategyStatic,
101+
ChunkingStrategyVectorStoreChunkingStrategyContextual,
102+
],
50103
PropertyInfo(discriminator="type"),
51104
]
52105

0 commit comments

Comments
 (0)