|
1 | | -import json |
2 | | -import logging |
3 | | -import os |
4 | 1 | import time |
5 | | -from collections.abc import Iterator |
6 | | -from pathlib import Path |
7 | 2 | from typing import Literal |
8 | 3 |
|
9 | | -import pytest |
10 | | -import urllib3 |
11 | | -import vectorize_client as v |
12 | | -from vectorize_client import ApiClient |
13 | | - |
14 | 4 | from langchain_vectorize.retrievers import VectorizeRetriever |
15 | 5 |
|
16 | 6 |
|
17 | | -@pytest.fixture(scope="session") |
18 | | -def api_token() -> str: |
19 | | - token = os.getenv("VECTORIZE_TOKEN") |
20 | | - if not token: |
21 | | - msg = "Please set the VECTORIZE_TOKEN environment variable" |
22 | | - raise ValueError(msg) |
23 | | - return token |
24 | | - |
25 | | - |
26 | | -@pytest.fixture(scope="session") |
27 | | -def org_id() -> str: |
28 | | - org = os.getenv("VECTORIZE_ORG") |
29 | | - if not org: |
30 | | - msg = "Please set the VECTORIZE_ORG environment variable" |
31 | | - raise ValueError(msg) |
32 | | - return org |
33 | | - |
34 | | - |
35 | | -@pytest.fixture(scope="session") |
36 | | -def environment() -> Literal["prod", "dev", "local", "staging"]: |
37 | | - env = os.getenv("VECTORIZE_ENV", "prod") |
38 | | - if env not in ["prod", "dev", "local", "staging"]: |
39 | | - msg = "Invalid VECTORIZE_ENV environment variable." |
40 | | - raise ValueError(msg) |
41 | | - return env |
42 | | - |
43 | | - |
44 | | -@pytest.fixture(scope="session") |
45 | | -def api_client(api_token: str, environment: str) -> Iterator[ApiClient]: |
46 | | - header_name = None |
47 | | - header_value = None |
48 | | - if environment == "prod": |
49 | | - host = "https://api.vectorize.io/v1" |
50 | | - elif environment == "dev": |
51 | | - host = "https://api-dev.vectorize.io/v1" |
52 | | - elif environment == "local": |
53 | | - host = "http://localhost:3000/api" |
54 | | - header_name = "x-lambda-api-key" |
55 | | - header_value = api_token |
56 | | - else: |
57 | | - host = "https://api-staging.vectorize.io/v1" |
58 | | - |
59 | | - with v.ApiClient( |
60 | | - v.Configuration(host=host, access_token=api_token, debug=True), |
61 | | - header_name, |
62 | | - header_value, |
63 | | - ) as api: |
64 | | - yield api |
65 | | - |
66 | | - |
67 | | -@pytest.fixture(scope="session") |
68 | | -def pipeline_id(api_client: v.ApiClient, org_id: str) -> Iterator[str]: |
69 | | - pipelines = v.PipelinesApi(api_client) |
70 | | - |
71 | | - connectors_api = v.ConnectorsApi(api_client) |
72 | | - response = connectors_api.create_source_connector( |
73 | | - org_id, |
74 | | - [ |
75 | | - v.CreateSourceConnector( |
76 | | - name="from api", type=v.SourceConnectorType.FILE_UPLOAD |
77 | | - ) |
78 | | - ], |
79 | | - ) |
80 | | - source_connector_id = response.connectors[0].id |
81 | | - logging.info("Created source connector %s", source_connector_id) |
82 | | - |
83 | | - uploads_api = v.UploadsApi(api_client) |
84 | | - upload_response = uploads_api.start_file_upload_to_connector( |
85 | | - org_id, |
86 | | - source_connector_id, |
87 | | - v.StartFileUploadToConnectorRequest( |
88 | | - name="research.pdf", |
89 | | - content_type="application/pdf", |
90 | | - metadata=json.dumps({"created-from-api": True}), |
91 | | - ), |
92 | | - ) |
93 | | - |
94 | | - http = urllib3.PoolManager() |
95 | | - this_dir = Path(__file__).parent |
96 | | - file_path = this_dir / "research.pdf" |
97 | | - |
98 | | - with file_path.open("rb") as f: |
99 | | - http_response = http.request( |
100 | | - "PUT", |
101 | | - upload_response.upload_url, |
102 | | - body=f, |
103 | | - headers={ |
104 | | - "Content-Type": "application/pdf", |
105 | | - "Content-Length": str(file_path.stat().st_size), |
106 | | - }, |
107 | | - ) |
108 | | - if http_response.status != 200: |
109 | | - msg = "Upload failed:" |
110 | | - raise ValueError(msg) |
111 | | - else: |
112 | | - logging.info("Upload successful") |
113 | | - |
114 | | - ai_platforms = connectors_api.get_ai_platform_connectors(org_id) |
115 | | - builtin_ai_platform = next( |
116 | | - c.id for c in ai_platforms.ai_platform_connectors if c.type == "VECTORIZE" |
117 | | - ) |
118 | | - logging.info("Using AI platform %s", builtin_ai_platform) |
119 | | - |
120 | | - vector_databases = connectors_api.get_destination_connectors(org_id) |
121 | | - builtin_vector_db = next( |
122 | | - c.id for c in vector_databases.destination_connectors if c.type == "VECTORIZE" |
123 | | - ) |
124 | | - logging.info("Using destination connector %s", builtin_vector_db) |
125 | | - |
126 | | - pipeline_response = pipelines.create_pipeline( |
127 | | - org_id, |
128 | | - v.PipelineConfigurationSchema( |
129 | | - source_connectors=[ |
130 | | - v.SourceConnectorSchema( |
131 | | - id=source_connector_id, |
132 | | - type=v.SourceConnectorType.FILE_UPLOAD, |
133 | | - config={}, |
134 | | - ) |
135 | | - ], |
136 | | - destination_connector=v.DestinationConnectorSchema( |
137 | | - id=builtin_vector_db, |
138 | | - type=v.DestinationConnectorType.VECTORIZE, |
139 | | - config={}, |
140 | | - ), |
141 | | - ai_platform=v.AIPlatformSchema( |
142 | | - id=builtin_ai_platform, |
143 | | - type=v.AIPlatformType.VECTORIZE, |
144 | | - config=v.AIPlatformConfigSchema(), |
145 | | - ), |
146 | | - pipeline_name="Test pipeline", |
147 | | - schedule=v.ScheduleSchema(type=v.ScheduleSchemaType.MANUAL), |
148 | | - ), |
149 | | - ) |
150 | | - pipeline_id = pipeline_response.data.id |
151 | | - logging.info("Created pipeline %s", pipeline_id) |
152 | | - |
153 | | - yield pipeline_id |
154 | | - |
155 | | - try: |
156 | | - pipelines.delete_pipeline(org_id, pipeline_id) |
157 | | - except Exception: |
158 | | - logging.exception("Failed to delete pipeline %s", pipeline_id) |
159 | | - |
160 | | - |
161 | 7 | def test_retrieve_init_args( |
162 | 8 | environment: Literal["prod", "dev", "local", "staging"], |
163 | 9 | api_token: str, |
|
0 commit comments