From c46b83a207099494e2543b76edc496014a8802f2 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 17 Sep 2025 14:37:59 -0700 Subject: [PATCH 1/5] Add configurable REST Catalog to test suite --- tests/integration/test_catalog.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 587c13b35b..e982b2f50a 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +import json +import os from pathlib import Path, PosixPath from typing import Generator, List @@ -74,6 +76,21 @@ def rest_catalog() -> Generator[Catalog, None, None]: clean_up(test_catalog) +@pytest.fixture(scope="function") +def rest_catalog_env() -> Generator[Catalog, None, None]: + properties_json = os.environ.get("PYICEBERG_REST_CATALOG_PROPERTIES") + if properties_json: + properties = json.loads(properties_json) + test_catalog = RestCatalog( + "rest_env", + **properties, + ) + yield test_catalog + clean_up(test_catalog) + else: + pytest.skip("REST catalog environment variables not set") + + @pytest.fixture(scope="function") def hive_catalog() -> Generator[Catalog, None, None]: test_catalog = HiveCatalog( @@ -95,6 +112,7 @@ def hive_catalog() -> Generator[Catalog, None, None]: pytest.lazy_fixture("sqlite_catalog_file"), pytest.lazy_fixture("rest_catalog"), pytest.lazy_fixture("hive_catalog"), + pytest.lazy_fixture("rest_catalog_env"), ] From 6e67ebc5c47bc8b505caf543a5adb64af79159f6 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Thu, 18 Sep 2025 15:35:37 -0700 Subject: [PATCH 2/5] address PR comments --- mkdocs/docs/contributing.md | 12 ++++++++++++ tests/integration/test_catalog.py | 3 +-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/mkdocs/docs/contributing.md b/mkdocs/docs/contributing.md index 4ff3541b54..9976b21512 100644 --- a/mkdocs/docs/contributing.md +++ b/mkdocs/docs/contributing.md @@ -155,6 +155,18 @@ make test-integration-rebuild To rebuild the containers from scratch. +#### Running Integration Tests against REST Catalogs + +PyIceberg supports the ability to run our catalog tests against an arbitrary REST Catalog. + +To do so, run the catalog integration tests with the `PYICEBERG_REST_CATALOG_PROPERTIES` environment variable. + +```sh +PYICEBERG_REST_CATALOG_PROPERTIES='{"uri": "http://localhost:8181"}' poetry run pytest tests/integration/ +``` + +`PYICEBERG_REST_CATALOG_PROPERTIES` should be a JSON-encoded string that contains any [catalog properties](https://py.iceberg.apache.org/configuration/#rest-catalog) necessary to interact with your REST Catalog implementation. + ## Code standards Below are the formalized conventions that we adhere to in the PyIceberg project. The goal of this is to have a common agreement on how to evolve the codebase, but also using it as guidelines for newcomers to the project. diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index e982b2f50a..5c475564c6 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -78,8 +78,7 @@ def rest_catalog() -> Generator[Catalog, None, None]: @pytest.fixture(scope="function") def rest_catalog_env() -> Generator[Catalog, None, None]: - properties_json = os.environ.get("PYICEBERG_REST_CATALOG_PROPERTIES") - if properties_json: + if properties_json := os.environ.get("PYICEBERG_REST_CATALOG_PROPERTIES"): properties = json.loads(properties_json) test_catalog = RestCatalog( "rest_env", From d0da977aa3700f9c1bf1b60a765ae22b4cd35e51 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Mon, 22 Sep 2025 15:21:46 -0700 Subject: [PATCH 3/5] PR comments --- mkdocs/docs/contributing.md | 17 +++++++++++++---- tests/integration/test_catalog.py | 15 +++++---------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/mkdocs/docs/contributing.md b/mkdocs/docs/contributing.md index 9976b21512..d2a2252ce3 100644 --- a/mkdocs/docs/contributing.md +++ b/mkdocs/docs/contributing.md @@ -159,13 +159,22 @@ To rebuild the containers from scratch. PyIceberg supports the ability to run our catalog tests against an arbitrary REST Catalog. -To do so, run the catalog integration tests with the `PYICEBERG_REST_CATALOG_PROPERTIES` environment variable. +You can set the test catalog in the ~/.pyiceberg.yaml file: -```sh -PYICEBERG_REST_CATALOG_PROPERTIES='{"uri": "http://localhost:8181"}' poetry run pytest tests/integration/ +```yaml +catalog: + test_catalog: + uri: http://rest-catalog/ws/ + credential: t-1234:secret ``` -`PYICEBERG_REST_CATALOG_PROPERTIES` should be a JSON-encoded string that contains any [catalog properties](https://py.iceberg.apache.org/configuration/#rest-catalog) necessary to interact with your REST Catalog implementation. +You can additionally set the properties using environment variables + +```sh +export PYICEBERG_CATALOG__TEST_CATALOG__URI=thrift://localhost:9083 +export PYICEBERG_CATALOG__TEST_CATALOG__ACCESS_KEY_ID=username +export PYICEBERG_CATALOG__TEST_CATALOG__SECRET_ACCESS_KEY=password +``` ## Code standards diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 5c475564c6..64223686da 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -import json import os from pathlib import Path, PosixPath from typing import Generator, List @@ -77,17 +76,13 @@ def rest_catalog() -> Generator[Catalog, None, None]: @pytest.fixture(scope="function") -def rest_catalog_env() -> Generator[Catalog, None, None]: - if properties_json := os.environ.get("PYICEBERG_REST_CATALOG_PROPERTIES"): - properties = json.loads(properties_json) - test_catalog = RestCatalog( - "rest_env", - **properties, - ) +def test_catalog() -> Generator[Catalog, None, None]: + if test_catalog_name := os.environ.get("PYICEBERG_TEST_CATALOG"): + test_catalog = RestCatalog(test_catalog_name) yield test_catalog clean_up(test_catalog) else: - pytest.skip("REST catalog environment variables not set") + pytest.skip("Test catalog environment variables not set") @pytest.fixture(scope="function") @@ -111,7 +106,7 @@ def hive_catalog() -> Generator[Catalog, None, None]: pytest.lazy_fixture("sqlite_catalog_file"), pytest.lazy_fixture("rest_catalog"), pytest.lazy_fixture("hive_catalog"), - pytest.lazy_fixture("rest_catalog_env"), + pytest.lazy_fixture("test_catalog"), ] From 85dc168475114f61f990d656a20b2c298e0e1d26 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 24 Sep 2025 15:39:47 -0700 Subject: [PATCH 4/5] Add warning in docs --- mkdocs/docs/contributing.md | 13 ++++++++++--- tests/integration/test_catalog.py | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/mkdocs/docs/contributing.md b/mkdocs/docs/contributing.md index d2a2252ce3..b9e4e5c50c 100644 --- a/mkdocs/docs/contributing.md +++ b/mkdocs/docs/contributing.md @@ -157,9 +157,18 @@ To rebuild the containers from scratch. #### Running Integration Tests against REST Catalogs +!!! warning "Do not run against production catalogs" + The integration tests will delete data throughout the entirety of your catalog. Running these integration tests against production catalogs will result in data loss. + PyIceberg supports the ability to run our catalog tests against an arbitrary REST Catalog. -You can set the test catalog in the ~/.pyiceberg.yaml file: +In order to run the test catalog, you will need to specify which REST catalog to run against with the `PYICEBERG_TEST_CATALOG` environment variable + +```sh +export PYICEBERG_TEST_CATALOG=test_catalog +``` + +The catalog in question can be configured either through the ~/.pyiceberg.yaml file or through environment variables. ```yaml catalog: @@ -168,8 +177,6 @@ catalog: credential: t-1234:secret ``` -You can additionally set the properties using environment variables - ```sh export PYICEBERG_CATALOG__TEST_CATALOG__URI=thrift://localhost:9083 export PYICEBERG_CATALOG__TEST_CATALOG__ACCESS_KEY_ID=username diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 64223686da..21f9436302 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -21,7 +21,7 @@ import pytest -from pyiceberg.catalog import Catalog, MetastoreCatalog +from pyiceberg.catalog import Catalog, MetastoreCatalog, load_catalog from pyiceberg.catalog.hive import HiveCatalog from pyiceberg.catalog.memory import InMemoryCatalog from pyiceberg.catalog.rest import RestCatalog @@ -78,7 +78,7 @@ def rest_catalog() -> Generator[Catalog, None, None]: @pytest.fixture(scope="function") def test_catalog() -> Generator[Catalog, None, None]: if test_catalog_name := os.environ.get("PYICEBERG_TEST_CATALOG"): - test_catalog = RestCatalog(test_catalog_name) + test_catalog = load_catalog(test_catalog_name) yield test_catalog clean_up(test_catalog) else: From 900f8953e8bf81c0ca80517365493d769f17d7b1 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Thu, 25 Sep 2025 15:00:36 -0700 Subject: [PATCH 5/5] Change fixture name to avoid issues --- tests/integration/test_catalog.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_catalog.py b/tests/integration/test_catalog.py index 21f9436302..b57ab983ba 100644 --- a/tests/integration/test_catalog.py +++ b/tests/integration/test_catalog.py @@ -76,13 +76,13 @@ def rest_catalog() -> Generator[Catalog, None, None]: @pytest.fixture(scope="function") -def test_catalog() -> Generator[Catalog, None, None]: +def rest_test_catalog() -> Generator[Catalog, None, None]: if test_catalog_name := os.environ.get("PYICEBERG_TEST_CATALOG"): test_catalog = load_catalog(test_catalog_name) yield test_catalog clean_up(test_catalog) else: - pytest.skip("Test catalog environment variables not set") + pytest.skip("PYICEBERG_TEST_CATALOG environment variables not set") @pytest.fixture(scope="function") @@ -106,7 +106,7 @@ def hive_catalog() -> Generator[Catalog, None, None]: pytest.lazy_fixture("sqlite_catalog_file"), pytest.lazy_fixture("rest_catalog"), pytest.lazy_fixture("hive_catalog"), - pytest.lazy_fixture("test_catalog"), + pytest.lazy_fixture("rest_test_catalog"), ]