Add langchain standard integration tests

cbornet · cbornet · commit 66415298af64 · 2025-08-04T19:08:00.000+02:00
diff --git a/langchain/pyproject.toml b/langchain/pyproject.toml
@@ -34,6 +34,7 @@ dev = [
     "mypy>=1.13.0",
     "pytest>=8.3.3",
     "ruff>=0.9.0,<0.10",
+    "langchain-tests>=0.3.20",
 ]
 
 [tool.ruff.lint]
diff --git a/langchain/tests/conftest.py b/langchain/tests/conftest.py
@@ -0,0 +1,155 @@
+import json
+import logging
+import os
+from collections.abc import Iterator
+from pathlib import Path
+from typing import Literal
+
+import pytest
+import urllib3
+import vectorize_client as v
+from vectorize_client import ApiClient
+
+
+@pytest.fixture(scope="session")
+def api_token() -> str:
+    token = os.getenv("VECTORIZE_TOKEN")
+    if not token:
+        msg = "Please set the VECTORIZE_TOKEN environment variable"
+        raise ValueError(msg)
+    return token
+
+
+@pytest.fixture(scope="session")
+def org_id() -> str:
+    org = os.getenv("VECTORIZE_ORG")
+    if not org:
+        msg = "Please set the VECTORIZE_ORG environment variable"
+        raise ValueError(msg)
+    return org
+
+
+@pytest.fixture(scope="session")
+def environment() -> Literal["prod", "dev", "local", "staging"]:
+    env = os.getenv("VECTORIZE_ENV", "prod")
+    if env not in ["prod", "dev", "local", "staging"]:
+        msg = "Invalid VECTORIZE_ENV environment variable."
+        raise ValueError(msg)
+    return env
+
+
+@pytest.fixture(scope="session")
+def api_client(api_token: str, environment: str) -> Iterator[ApiClient]:
+    header_name = None
+    header_value = None
+    if environment == "prod":
+        host = "https://api.vectorize.io/v1"
+    elif environment == "dev":
+        host = "https://api-dev.vectorize.io/v1"
+    elif environment == "local":
+        host = "http://localhost:3000/api"
+        header_name = "x-lambda-api-key"
+        header_value = api_token
+    else:
+        host = "https://api-staging.vectorize.io/v1"
+
+    with v.ApiClient(
+        v.Configuration(host=host, access_token=api_token, debug=True),
+        header_name,
+        header_value,
+    ) as api:
+        yield api
+
+
+@pytest.fixture(scope="session")
+def pipeline_id(api_client: v.ApiClient, org_id: str) -> Iterator[str]:
+    pipelines = v.PipelinesApi(api_client)
+
+    connectors_api = v.ConnectorsApi(api_client)
+    response = connectors_api.create_source_connector(
+        org_id,
+        [
+            v.CreateSourceConnector(
+                name="from api", type=v.SourceConnectorType.FILE_UPLOAD
+            )
+        ],
+    )
+    source_connector_id = response.connectors[0].id
+    logging.info("Created source connector %s", source_connector_id)
+
+    uploads_api = v.UploadsApi(api_client)
+    upload_response = uploads_api.start_file_upload_to_connector(
+        org_id,
+        source_connector_id,
+        v.StartFileUploadToConnectorRequest(
+            name="research.pdf",
+            content_type="application/pdf",
+            metadata=json.dumps({"created-from-api": True}),
+        ),
+    )
+
+    http = urllib3.PoolManager()
+    this_dir = Path(__file__).parent
+    file_path = this_dir / "research.pdf"
+
+    with file_path.open("rb") as f:
+        http_response = http.request(
+            "PUT",
+            upload_response.upload_url,
+            body=f,
+            headers={
+                "Content-Type": "application/pdf",
+                "Content-Length": str(file_path.stat().st_size),
+            },
+        )
+    if http_response.status != 200:
+        msg = "Upload failed:"
+        raise ValueError(msg)
+    else:
+        logging.info("Upload successful")
+
+    ai_platforms = connectors_api.get_ai_platform_connectors(org_id)
+    builtin_ai_platform = next(
+        c.id for c in ai_platforms.ai_platform_connectors if c.type == "VECTORIZE"
+    )
+    logging.info("Using AI platform %s", builtin_ai_platform)
+
+    vector_databases = connectors_api.get_destination_connectors(org_id)
+    builtin_vector_db = next(
+        c.id for c in vector_databases.destination_connectors if c.type == "VECTORIZE"
+    )
+    logging.info("Using destination connector %s", builtin_vector_db)
+
+    pipeline_response = pipelines.create_pipeline(
+        org_id,
+        v.PipelineConfigurationSchema(
+            source_connectors=[
+                v.SourceConnectorSchema(
+                    id=source_connector_id,
+                    type=v.SourceConnectorType.FILE_UPLOAD,
+                    config={},
+                )
+            ],
+            destination_connector=v.DestinationConnectorSchema(
+                id=builtin_vector_db,
+                type=v.DestinationConnectorType.VECTORIZE,
+                config={},
+            ),
+            ai_platform=v.AIPlatformSchema(
+                id=builtin_ai_platform,
+                type=v.AIPlatformType.VECTORIZE,
+                config=v.AIPlatformConfigSchema(),
+            ),
+            pipeline_name="Test pipeline",
+            schedule=v.ScheduleSchema(type=v.ScheduleSchemaType.MANUAL),
+        ),
+    )
+    pipeline_id = pipeline_response.data.id
+    logging.info("Created pipeline %s", pipeline_id)
+
+    yield pipeline_id
+
+    try:
+        pipelines.delete_pipeline(org_id, pipeline_id)
+    except Exception:
+        logging.exception("Failed to delete pipeline %s", pipeline_id)
diff --git a/langchain/tests/test_langchain_integration_tests.py b/langchain/tests/test_langchain_integration_tests.py
@@ -0,0 +1,39 @@
+from typing import Literal
+
+import pytest
+from langchain_tests.integration_tests import RetrieversIntegrationTests
+
+from langchain_vectorize import VectorizeRetriever
+
+
+class TestParrotRetrieverIntegration(RetrieversIntegrationTests):
+    @pytest.fixture(autouse=True, scope="class")
+    @classmethod
+    def setup(
+        cls,
+        environment: Literal["prod", "dev", "local", "staging"],
+        api_token: str,
+        org_id: str,
+        pipeline_id: str,
+    ) -> None:
+        cls.environment = environment
+        cls.api_token = api_token
+        cls.org_id = org_id
+        cls.pipeline_id = pipeline_id
+
+    @property
+    def retriever_constructor(self) -> type[VectorizeRetriever]:
+        return VectorizeRetriever
+
+    @property
+    def retriever_constructor_params(self) -> dict:
+        return {
+            "environment": self.environment,
+            "api_token": self.api_token,
+            "organization": self.org_id,
+            "pipeline_id": self.pipeline_id,
+        }
+
+    @property
+    def retriever_query_example(self) -> str:
+        return "What are you?"
diff --git a/langchain/tests/test_retrievers.py b/langchain/tests/test_retrievers.py
@@ -1,163 +1,9 @@
-import json
-import logging
-import os
 import time
-from collections.abc import Iterator
-from pathlib import Path
 from typing import Literal
 
-import pytest
-import urllib3
-import vectorize_client as v
-from vectorize_client import ApiClient
-
 from langchain_vectorize.retrievers import VectorizeRetriever
 
 
-@pytest.fixture(scope="session")
-def api_token() -> str:
-    token = os.getenv("VECTORIZE_TOKEN")
-    if not token:
-        msg = "Please set the VECTORIZE_TOKEN environment variable"
-        raise ValueError(msg)
-    return token
-
-
-@pytest.fixture(scope="session")
-def org_id() -> str:
-    org = os.getenv("VECTORIZE_ORG")
-    if not org:
-        msg = "Please set the VECTORIZE_ORG environment variable"
-        raise ValueError(msg)
-    return org
-
-
-@pytest.fixture(scope="session")
-def environment() -> Literal["prod", "dev", "local", "staging"]:
-    env = os.getenv("VECTORIZE_ENV", "prod")
-    if env not in ["prod", "dev", "local", "staging"]:
-        msg = "Invalid VECTORIZE_ENV environment variable."
-        raise ValueError(msg)
-    return env
-
-
-@pytest.fixture(scope="session")
-def api_client(api_token: str, environment: str) -> Iterator[ApiClient]:
-    header_name = None
-    header_value = None
-    if environment == "prod":
-        host = "https://api.vectorize.io/v1"
-    elif environment == "dev":
-        host = "https://api-dev.vectorize.io/v1"
-    elif environment == "local":
-        host = "http://localhost:3000/api"
-        header_name = "x-lambda-api-key"
-        header_value = api_token
-    else:
-        host = "https://api-staging.vectorize.io/v1"
-
-    with v.ApiClient(
-        v.Configuration(host=host, access_token=api_token, debug=True),
-        header_name,
-        header_value,
-    ) as api:
-        yield api
-
-
-@pytest.fixture(scope="session")
-def pipeline_id(api_client: v.ApiClient, org_id: str) -> Iterator[str]:
-    pipelines = v.PipelinesApi(api_client)
-
-    connectors_api = v.ConnectorsApi(api_client)
-    response = connectors_api.create_source_connector(
-        org_id,
-        [
-            v.CreateSourceConnector(
-                name="from api", type=v.SourceConnectorType.FILE_UPLOAD
-            )
-        ],
-    )
-    source_connector_id = response.connectors[0].id
-    logging.info("Created source connector %s", source_connector_id)
-
-    uploads_api = v.UploadsApi(api_client)
-    upload_response = uploads_api.start_file_upload_to_connector(
-        org_id,
-        source_connector_id,
-        v.StartFileUploadToConnectorRequest(
-            name="research.pdf",
-            content_type="application/pdf",
-            metadata=json.dumps({"created-from-api": True}),
-        ),
-    )
-
-    http = urllib3.PoolManager()
-    this_dir = Path(__file__).parent
-    file_path = this_dir / "research.pdf"
-
-    with file_path.open("rb") as f:
-        http_response = http.request(
-            "PUT",
-            upload_response.upload_url,
-            body=f,
-            headers={
-                "Content-Type": "application/pdf",
-                "Content-Length": str(file_path.stat().st_size),
-            },
-        )
-    if http_response.status != 200:
-        msg = "Upload failed:"
-        raise ValueError(msg)
-    else:
-        logging.info("Upload successful")
-
-    ai_platforms = connectors_api.get_ai_platform_connectors(org_id)
-    builtin_ai_platform = next(
-        c.id for c in ai_platforms.ai_platform_connectors if c.type == "VECTORIZE"
-    )
-    logging.info("Using AI platform %s", builtin_ai_platform)
-
-    vector_databases = connectors_api.get_destination_connectors(org_id)
-    builtin_vector_db = next(
-        c.id for c in vector_databases.destination_connectors if c.type == "VECTORIZE"
-    )
-    logging.info("Using destination connector %s", builtin_vector_db)
-
-    pipeline_response = pipelines.create_pipeline(
-        org_id,
-        v.PipelineConfigurationSchema(
-            source_connectors=[
-                v.SourceConnectorSchema(
-                    id=source_connector_id,
-                    type=v.SourceConnectorType.FILE_UPLOAD,
-                    config={},
-                )
-            ],
-            destination_connector=v.DestinationConnectorSchema(
-                id=builtin_vector_db,
-                type=v.DestinationConnectorType.VECTORIZE,
-                config={},
-            ),
-            ai_platform=v.AIPlatformSchema(
-                id=builtin_ai_platform,
-                type=v.AIPlatformType.VECTORIZE,
-                config=v.AIPlatformConfigSchema(),
-            ),
-            pipeline_name="Test pipeline",
-            schedule=v.ScheduleSchema(type=v.ScheduleSchemaType.MANUAL),
-        ),
-    )
-    pipeline_id = pipeline_response.data.id
-    logging.info("Created pipeline %s", pipeline_id)
-
-    yield pipeline_id
-
-    try:
-        pipelines.delete_pipeline(org_id, pipeline_id)
-    except Exception:
-        logging.exception("Failed to delete pipeline %s", pipeline_id)
-
-
 def test_retrieve_init_args(
     environment: Literal["prod", "dev", "local", "staging"],
     api_token: str,
diff --git a/langchain/uv.lock b/langchain/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -34,6 +34,7 @@ dev = [`
`34`	`34`	`"mypy>=1.13.0",`
`35`	`35`	`"pytest>=8.3.3",`
`36`	`36`	`"ruff>=0.9.0,<0.10",`
	`37`	`+ "langchain-tests>=0.3.20",`
`37`	`38`	`]`
`38`	`39`
`39`	`40`	`[tool.ruff.lint]`