Skip to content

Commit b429769

Browse files
author
Daniel Lorch
committed
chore: filter S3 vectors result by threshold
1 parent 1b1e389 commit b429769

File tree

2 files changed

+22
-2
lines changed

2 files changed

+22
-2
lines changed

notebooks/examples/dynamic-few-shot-lambda/GENAIIDP-dynamic-few-shot.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
S3VECTOR_DIMENSIONS = int(os.environ['S3VECTOR_DIMENSIONS'])
3232
MODEL_ID = os.environ['MODEL_ID']
3333
TOP_K = int(os.environ['TOP_K'])
34+
THRESHOLD = float(os.environ['THRESHOLD'])
3435
except (KeyError, ValueError, IndexError) as e:
3536
logger.error(f"Failed to parse environment variables: {e}")
3637
raise
@@ -64,7 +65,7 @@ def lambda_handler(event, context):
6465
try:
6566
logger.info("=== DYNAMIC FEW-SHOT LAMBDA INVOKED ===")
6667
logger.debug(f"Complete input event: {json.dumps(event, indent=2)}")
67-
68+
6869
# Validate input
6970
class_label = event.get("class_label")
7071
document_text = event.get("document_text")
@@ -78,6 +79,8 @@ def lambda_handler(event, context):
7879
# Decode input data
7980
image_data = _decode_images(image_content)
8081

82+
logger.info(f"=== FIND SIMILAR ITEMS ===")
83+
8184
# Find similar items using S3 vectors lookup from image similarity
8285
result = _s3vectors_find_similar_items(image_data)
8386

@@ -141,7 +144,17 @@ def _s3vectors_find_similar_items(image_data):
141144
# sort results by distance score (lowest to highest - lower is more similar)
142145
sorted_result = sorted(result, key=lambda example: example['distance'], reverse=False)
143146

144-
return sorted_result
147+
# filter result by distance score
148+
filtered_result = []
149+
for example in sorted_result:
150+
if example['distance'] > THRESHOLD:
151+
logger.info(
152+
f"Skipping example with distance {example['distance']} above threshold {THRESHOLD}: {key}"
153+
)
154+
else:
155+
filtered_result.append(example)
156+
157+
return filtered_result
145158

146159
def _s3vectors_find_similar_items_from_image(page_image):
147160
"""Search for similar items using image query"""

notebooks/examples/dynamic-few-shot-lambda/template.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ Parameters:
3535
TopK:
3636
Type: Number
3737
Default: 2
38+
Description: The number of results to return for each S3 vectors query.
39+
40+
Threshold:
41+
Type: Number
42+
Default: 0.2
43+
Description: Filter results exceeding this similarity threshold (lower is more similar)
3844

3945
LambdaFunctionName:
4046
Type: String
@@ -79,6 +85,7 @@ Resources:
7985
S3VECTOR_DIMENSIONS: !Ref VectorDimensions
8086
MODEL_ID: !Ref ModelId
8187
TOP_K: !Ref TopK
88+
THRESHOLD: !Ref Threshold
8289
LoggingConfig:
8390
LogGroup: !Ref DynamicFewShotLogGroup
8491
# Minimal permissions - only needs basic execution and logging

0 commit comments

Comments
 (0)