Skip to content

Commit bbb7430

Browse files
committed
create corpus for weaviate
1 parent c0533bc commit bbb7430

File tree

1 file changed

+81
-0
lines changed

1 file changed

+81
-0
lines changed
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from typing import Optional
17+
18+
from vertexai.preview.rag import RagCorpus
19+
20+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
21+
22+
23+
def create_corpus_weaviate(
24+
weaviate_http_endpoint: str,
25+
weaviate_collection_name: str,
26+
weaviate_api_key_secret_manager_version: str,
27+
display_name: Optional[str] = None,
28+
description: Optional[str] = None,
29+
) -> RagCorpus:
30+
# [START generativeaionvertexai_rag_create_corpus_weaviate]
31+
32+
from vertexai.preview import rag
33+
import vertexai
34+
35+
# TODO(developer): Update and un-comment below lines
36+
# PROJECT_ID = "your-project-id"
37+
# weaviate_http_endpoint = "weaviate-http-endpoint"
38+
# weaviate_collection_name = "weaviate-collection-name"
39+
# weaviate_api_key_secret_manager_version = "projects/{PROJECT_ID}/secrets/{SECRET_NAME}/versions/latest"
40+
# display_name = "test_corpus"
41+
# description = "Corpus Description"
42+
43+
# Initialize Vertex AI API once per session
44+
vertexai.init(project=PROJECT_ID, location="us-central1")
45+
46+
# Configure embedding model (Optional)
47+
embedding_model_config = rag.EmbeddingModelConfig(
48+
publisher_model="publishers/google/models/text-embedding-004"
49+
)
50+
51+
# Configure Vector DB
52+
vector_db = rag.Weaviate(
53+
weaviate_http_endpoint=weaviate_http_endpoint,
54+
collection_name=weaviate_collection_name,
55+
api_key=weaviate_api_key_secret_manager_version,
56+
)
57+
58+
corpus = rag.create_corpus(
59+
display_name=display_name,
60+
description=description,
61+
embedding_model_config=embedding_model_config,
62+
vector_db=vector_db,
63+
)
64+
print(corpus)
65+
# Example response:
66+
# RagCorpus(name='projects/1234567890/locations/us-central1/ragCorpora/1234567890',
67+
# display_name='test_corpus', description='Corpus Description', embedding_model_config=...
68+
# ...
69+
70+
# [END generativeaionvertexai_rag_create_corpus_weaviate]
71+
return corpus
72+
73+
74+
if __name__ == "__main__":
75+
create_corpus_weaviate(
76+
weaviate_http_endpoint="weaviate-http-endpoint",
77+
weaviate_collection_name="weaviate-collection-name",
78+
weaviate_api_key_secret_manager_version="projects/{PROJECT_ID}/secrets/{SECRET_NAME}/versions/latest",
79+
display_name="test_corpus",
80+
description="Corpus Description",
81+
)

0 commit comments

Comments
 (0)