Skip to content

Commit 76c1ee4

Browse files
author
Bob Strahan
committed
Update S3 Vectors Knowledge Base integration to use IndexArn approach
1 parent 58c4936 commit 76c1ee4

File tree

4 files changed

+81
-55
lines changed

4 files changed

+81
-55
lines changed

options/bedrockkb/src/s3_vectors_manager/handler.py

Lines changed: 51 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,7 @@ def handle_knowledge_base_resources(event, context, properties):
188188
description = properties.get('Description', '')
189189
role_arn = properties.get('RoleArn', '')
190190
embedding_model_arn = properties.get('EmbeddingModelArn', '')
191-
bucket_arn = properties.get('BucketArn', '')
192-
index_name = properties.get('IndexName', '')
191+
index_arn = properties.get('IndexArn', '') # Now expects IndexArn directly
193192
region = properties.get('Region', '')
194193
kms_key_arn = properties.get('KmsKeyArn', '')
195194

@@ -200,7 +199,7 @@ def handle_knowledge_base_resources(event, context, properties):
200199
logger.info(f"Creating Knowledge Base with S3 Vectors: {kb_name}")
201200
return create_knowledge_base_s3_vectors(
202201
bedrock_agent_client, kb_name, description, role_arn,
203-
embedding_model_arn, bucket_arn, index_name, kms_key_arn
202+
embedding_model_arn, index_arn, kms_key_arn
204203
)
205204

206205
elif request_type == 'Update':
@@ -215,7 +214,7 @@ def handle_knowledge_base_resources(event, context, properties):
215214
# Fallback: create new if we can't find existing
216215
return create_knowledge_base_s3_vectors(
217216
bedrock_agent_client, kb_name, description, role_arn,
218-
embedding_model_arn, bucket_arn, index_name, kms_key_arn
217+
embedding_model_arn, index_arn, kms_key_arn
219218
)
220219

221220
elif request_type == 'Delete':
@@ -246,42 +245,46 @@ def extract_knowledge_base_id_from_physical_id(physical_id):
246245

247246

248247
def create_knowledge_base_s3_vectors(bedrock_agent_client, name, description, role_arn,
249-
embedding_model_arn, bucket_arn, index_name, kms_key_arn=None):
250-
"""Create Knowledge Base with S3 Vectors using Bedrock Agent API."""
248+
embedding_model_arn, index_arn, kms_key_arn=None):
249+
"""Create Knowledge Base with S3 Vectors using Console-proven approach."""
251250
try:
252251
logger.info(f"Creating Knowledge Base: {name} with S3 Vectors")
253-
logger.info(f"Using bucket ARN: {bucket_arn}, index name: {index_name}")
254-
255-
# Build S3 Vectors configuration - use correct parameter names per API documentation
256-
s3_vectors_config = {
257-
'vectorBucketArn': bucket_arn, # Correct parameter name
258-
'indexName': index_name
259-
}
252+
logger.info(f"Using index ARN: {index_arn}")
260253

261-
# Note: KMS encryption is handled at the S3 vector bucket level, not in Knowledge Base config
262-
# The error showed that kmsKeyArn is not supported in s3VectorsConfiguration
263-
if kms_key_arn:
264-
logger.info(f"KMS encryption configured at bucket level with key: {kms_key_arn}")
254+
# Use the working Console payload structure
255+
import time
265256

266257
response = bedrock_agent_client.create_knowledge_base(
258+
clientToken=f"cfn-{int(time.time())}-{'a' * 20}", # 33+ chars required
267259
name=name,
268260
description=description,
269261
roleArn=role_arn,
270262
knowledgeBaseConfiguration={
271263
'type': 'VECTOR',
272264
'vectorKnowledgeBaseConfiguration': {
265+
'embeddingModelConfiguration': {
266+
'bedrockEmbeddingModelConfiguration': {
267+
'dimensions': 1024, # Console uses 1024
268+
'embeddingDataType': 'FLOAT32'
269+
}
270+
},
273271
'embeddingModelArn': embedding_model_arn
274272
}
275273
},
276274
storageConfiguration={
277275
'type': 'S3_VECTORS',
278-
's3VectorsConfiguration': s3_vectors_config
276+
's3VectorsConfiguration': {
277+
'indexArn': index_arn # Use indexArn approach (Console-proven)
278+
}
279279
}
280280
)
281281

282282
kb_id = response['knowledgeBase']['knowledgeBaseId']
283283
logger.info(f"Created Knowledge Base with ID: {kb_id}")
284284

285+
if kms_key_arn:
286+
logger.info(f"KMS encryption configured at bucket level with key: {kms_key_arn}")
287+
285288
return {
286289
'KnowledgeBaseId': kb_id,
287290
'KnowledgeBaseName': name,
@@ -323,7 +326,7 @@ def get_knowledge_base_info(bedrock_agent_client, kb_id):
323326

324327

325328
def create_s3_vector_resources(s3vectors_client, bucket_name, index_name, embedding_model, kms_key_arn=None):
326-
"""Create S3 Vector bucket with optional KMS encryption. Bedrock will manage the index automatically."""
329+
"""Create S3 Vector bucket and index following Console approach (manual index creation required)."""
327330
try:
328331
# Get region from client for ARN construction
329332
region = s3vectors_client.meta.region_name
@@ -346,43 +349,50 @@ def create_s3_vector_resources(s3vectors_client, bucket_name, index_name, embedd
346349
bucket_response = s3vectors_client.create_vector_bucket(**create_bucket_params)
347350
logger.info(f"Created vector bucket: {bucket_name}")
348351

349-
# Get bucket ARN from response or construct it properly using SANITIZED bucket name
350-
bucket_arn = bucket_response.get('BucketArn')
351-
if not bucket_arn:
352-
# Construct bucket ARN using proper format with account ID and SANITIZED bucket name
353-
try:
354-
# Get account ID for ARN construction
355-
sts_client = boto3.client('sts', region_name=region)
356-
account_id = sts_client.get_caller_identity()['Account']
357-
bucket_arn = f"arn:aws:s3vectors:{region}:{account_id}:bucket/{bucket_name}"
358-
logger.info(f"Constructed bucket ARN with sanitized name: {bucket_arn}")
359-
except Exception as arn_error:
360-
logger.error(f"Could not construct bucket ARN: {arn_error}")
361-
raise arn_error
352+
# Create S3 Vector Index (required for Knowledge Base integration)
353+
logger.info(f"Creating vector index: {index_name}")
354+
355+
index_response = s3vectors_client.create_index(
356+
vectorBucketName=bucket_name,
357+
indexName=index_name,
358+
dataType="float32",
359+
dimension=1024, # Console uses 1024 for Titan v2
360+
distanceMetric="cosine",
361+
metadataConfiguration={
362+
"nonFilterableMetadataKeys": [
363+
"AMAZON_BEDROCK_METADATA",
364+
"AMAZON_BEDROCK_TEXT_CHUNK"
365+
]
366+
}
367+
)
368+
logger.info(f"Created vector index: {index_name}")
369+
370+
# Construct ARNs
371+
sts_client = boto3.client('sts', region_name=region)
372+
account_id = sts_client.get_caller_identity()['Account']
373+
374+
bucket_arn = f"arn:aws:s3vectors:{region}:{account_id}:bucket/{bucket_name}"
375+
index_arn = f"arn:aws:s3vectors:{region}:{account_id}:bucket/{bucket_name}/index/{index_name}"
362376

363377
logger.info(f"Vector bucket ARN: {bucket_arn}")
364-
logger.info(f"Sanitized bucket name used consistently: {bucket_name}")
378+
logger.info(f"Vector index ARN: {index_arn}")
365379

366380
# Validate bucket name one more time before returning
367381
if not is_valid_s3_bucket_name(bucket_name):
368382
raise ValueError(f"Sanitized bucket name is still invalid: {bucket_name}")
369383

370-
# Note: For Bedrock Knowledge Base integration, Bedrock will automatically
371-
# create and manage the vector index when the Knowledge Base is created.
372-
# We don't create the index manually here.
373-
374384
return {
375385
'BucketName': bucket_name, # Return sanitized name
376-
'BucketArn': bucket_arn, # Return proper ARN with sanitized name
377-
'SanitizedBucketName': bucket_name, # Explicit sanitized name for KB creation
378-
'IndexName': index_name, # This will be used by Bedrock
386+
'BucketArn': bucket_arn, # Return proper bucket ARN
387+
'IndexName': index_name, # Index name
388+
'IndexArn': index_arn, # Index ARN for Knowledge Base
379389
'Status': 'Created'
380390
}
381391

382392
except ClientError as e:
383393
error_code = e.response['Error']['Code']
384394
if error_code in ['BucketAlreadyExists', 'ConflictException']:
385-
logger.warning(f"Vector bucket already exists: {e}")
395+
logger.warning(f"Vector resource already exists: {e}")
386396
# Try to get existing resource info
387397
return get_s3_vector_info(s3vectors_client, bucket_name, index_name)
388398
else:
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
# AWS SDK for Python
2-
boto3>=1.34.0
1+
# AWS SDK for Python - Use latest version for S3 Vectors support
2+
boto3>=1.40.25
33

44
# CloudFormation response helper (built into Lambda runtime but useful for local testing)
55
cfnresponse
66

7-
# Optional: For enhanced error handling and logging
8-
botocore>=1.34.0
7+
# Optional: For enhanced error handling and logging
8+
botocore>=1.40.25

options/bedrockkb/template.yaml

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -397,10 +397,10 @@ Resources:
397397
- s3vectors:DeleteVectorBucket
398398
- s3vectors:GetVectorBucket
399399
- s3vectors:ListVectorBuckets
400-
- s3vectors:CreateVectorIndex
401-
- s3vectors:DeleteVectorIndex
402-
- s3vectors:DescribeVectorIndex
403-
- s3vectors:ListVectorIndexes
400+
- s3vectors:CreateIndex
401+
- s3vectors:DeleteIndex
402+
- s3vectors:GetIndex
403+
- s3vectors:ListIndexes
404404
- s3vectors:PutVectors
405405
- s3vectors:GetVectors
406406
- s3vectors:QueryVectors
@@ -714,13 +714,15 @@ Resources:
714714
- s3vectors:QueryVectors
715715
- s3vectors:DeleteVectors
716716
- s3vectors:DescribeVectorIndex
717+
- s3vectors:GetIndex
718+
- s3vectors:ListIndexes
717719
Resource:
720+
- !Sub
721+
- "arn:${AWS::Partition}:s3vectors:${AWS::Region}:${AWS::AccountId}:bucket/${BucketName}"
722+
- BucketName: !GetAtt S3VectorBucketAndIndex.BucketName
718723
- !Sub
719724
- "arn:${AWS::Partition}:s3vectors:${AWS::Region}:${AWS::AccountId}:bucket/${BucketName}/*"
720-
- BucketName: !If
721-
- HasS3VectorBucketName
722-
- !Ref pS3VectorBucketName
723-
- !Sub "${AWS::StackName}-s3-vectors"
725+
- BucketName: !GetAtt S3VectorBucketAndIndex.BucketName
724726
- !Ref "AWS::NoValue"
725727
- !If
726728
- IsS3DataSource
@@ -798,8 +800,7 @@ Resources:
798800
Description: "Bedrock Knowledge Base with S3 Vectors"
799801
RoleArn: !GetAtt KnowledgeBaseServiceRole.Arn
800802
EmbeddingModelArn: !Sub "arn:${AWS::Partition}:bedrock:${AWS::Region}::foundation-model/${pEmbedModel}"
801-
BucketArn: !GetAtt S3VectorBucketAndIndex.BucketArn
802-
IndexName: !Ref pS3VectorIndexName
803+
IndexArn: !GetAtt S3VectorBucketAndIndex.IndexArn
803804
Region: !Ref AWS::Region
804805
KmsKeyArn: !If [IsCustomerManagedKey, !Ref pCustomerManagedEncryptionKeyArn, !Ref "AWS::NoValue"]
805806

template.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,7 @@ Conditions:
452452
DocumentSectionsCrawlerScheduleEnabled: !Not [!Equals [!Ref DocumentSectionsCrawlerFrequency, "Manual"]]
453453
HasPermissionsBoundary: !Not [!Equals [!Ref PermissionsBoundaryArn, ""]]
454454
HasCustomConfigPath: !Not [!Equals [!Ref CustomConfigPath, ""]]
455+
IsS3VectorsVectorStore: !Equals [!Ref KnowledgeBaseVectorStore, "S3_VECTORS"]
455456

456457

457458
Metadata:
@@ -1016,6 +1017,20 @@ Resources:
10161017
- kms:GenerateDataKey*
10171018
- kms:DescribeKey
10181019
Resource: "*"
1020+
- !If
1021+
- IsS3VectorsVectorStore
1022+
- Sid: Allow S3 Vectors indexing service to use the key
1023+
Effect: Allow
1024+
Principal:
1025+
Service: !Sub "indexing.s3vectors.${AWS::URLSuffix}"
1026+
Action:
1027+
- kms:Encrypt
1028+
- kms:Decrypt
1029+
- kms:ReEncrypt*
1030+
- kms:GenerateDataKey*
1031+
- kms:DescribeKey
1032+
Resource: "*"
1033+
- !Ref AWS::NoValue
10191034

10201035
CustomerManagedEncryptionKeyAlias:
10211036
Type: AWS::KMS::Alias

0 commit comments

Comments
 (0)