From b39988dca0d25140f38c73318ec97244632e8b82 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 19 Jun 2025 11:02:26 -0700 Subject: [PATCH 01/16] group the file io integration tests --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index cd6056b027..6cf29245b5 100644 --- a/Makefile +++ b/Makefile @@ -48,10 +48,6 @@ lint: ## lint test: ## Run all unit tests, can add arguments with PYTEST_ARGS="-vv" poetry run pytest tests/ -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS} -test-s3: # Run tests marked with s3, can add arguments with PYTEST_ARGS="-vv" - sh ./dev/run-minio.sh - poetry run pytest tests/ -m s3 ${PYTEST_ARGS} - test-integration: | test-integration-setup test-integration-exec ## Run all integration tests, can add arguments with PYTEST_ARGS="-vv" test-integration-setup: # Prepare the environment for integration @@ -70,6 +66,10 @@ test-integration-rebuild: docker compose -f dev/docker-compose-integration.yml rm -f docker compose -f dev/docker-compose-integration.yml build --no-cache +test-s3: # Run tests marked with s3, can add arguments with PYTEST_ARGS="-vv" + sh ./dev/run-minio.sh + poetry run pytest tests/ -m s3 ${PYTEST_ARGS} + test-adls: ## Run tests marked with adls, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-azurite.sh poetry run pytest tests/ -m adls ${PYTEST_ARGS} From fd3cf2a871d1a56a13c6f143c0a1d929dc204320 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 19 Jun 2025 11:07:01 -0700 Subject: [PATCH 02/16] add to test-coverage --- Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 6cf29245b5..224627614e 100644 --- a/Makefile +++ b/Makefile @@ -76,7 +76,7 @@ test-adls: ## Run tests marked with adls, can add arguments with PYTEST_ARGS="-v test-gcs: ## Run tests marked with gcs, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-gcs-server.sh - poetry run pytest tests/ -m gcs ${PYTEST_ARGS} + poetry run pytest tests/ -m gcs ${PYTEST_ARGS} test-coverage-unit: # Run test with coverage for unit tests, can add arguments with PYTEST_ARGS="-vv" poetry run coverage run --source=pyiceberg/ --data-file=.coverage.unit -m pytest tests/ -v -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS} @@ -85,14 +85,12 @@ test-coverage-integration: # Run test with coverage for integration tests, can a docker compose -f dev/docker-compose-integration.yml kill docker compose -f dev/docker-compose-integration.yml rm -f docker compose -f dev/docker-compose-integration.yml up -d - sh ./dev/run-azurite.sh - sh ./dev/run-gcs-server.sh sleep 10 docker compose -f dev/docker-compose-integration.yml cp ./dev/provision.py spark-iceberg:/opt/spark/provision.py docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -v -m integration ${PYTEST_ARGS} -test-coverage: | test-coverage-unit test-coverage-integration ## Run all tests with coverage including unit and integration tests +test-coverage: | test-coverage-unit test-coverage-integration test-s3 test-adls test-gcs ## Run all tests with coverage including unit and integration tests poetry run coverage combine .coverage.unit .coverage.integration poetry run coverage report -m --fail-under=90 poetry run coverage html From ee98b42eda113ec3eb126f1a85cf2a04bdf60100 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 19 Jun 2025 11:17:34 -0700 Subject: [PATCH 03/16] refactor --- Makefile | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index 224627614e..686cfd0554 100644 --- a/Makefile +++ b/Makefile @@ -16,11 +16,11 @@ # under the License. -help: ## Display this help +help: # Display this help @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) POETRY_VERSION = 2.1.1 -install-poetry: ## Ensure Poetry is installed and the correct version is being used. +install-poetry: # Ensure Poetry is installed and the correct version is being used. @if ! command -v poetry &> /dev/null; then \ echo "Poetry could not be found. Installing..."; \ pip install --user poetry==$(POETRY_VERSION); \ @@ -34,21 +34,21 @@ install-poetry: ## Ensure Poetry is installed and the correct version is being fi \ fi -install-dependencies: ## Install dependencies including dev, docs, and all extras +install-dependencies: # Install dependencies including dev, docs, and all extras poetry install --all-extras install: | install-poetry install-dependencies -check-license: ## Check license headers +check-license: # Check license headers ./dev/check-license -lint: ## lint +lint: # lint poetry run pre-commit run --all-files -test: ## Run all unit tests, can add arguments with PYTEST_ARGS="-vv" +test: # Run all unit tests, can add arguments with PYTEST_ARGS="-vv" poetry run pytest tests/ -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS} -test-integration: | test-integration-setup test-integration-exec ## Run all integration tests, can add arguments with PYTEST_ARGS="-vv" +test-integration: | test-integration-setup test-integration-exec # Run all integration tests, can add arguments with PYTEST_ARGS="-vv" test-integration-setup: # Prepare the environment for integration docker compose -f dev/docker-compose-integration.yml kill @@ -58,7 +58,7 @@ test-integration-setup: # Prepare the environment for integration docker compose -f dev/docker-compose-integration.yml cp ./dev/provision.py spark-iceberg:/opt/spark/provision.py docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py -test-integration-exec: # Execute integration tests, can add arguments with PYTEST_ARGS="-vv" +test-integration-exec: # Execute integration tests, can add arguments with PYTEST_ARGS="-vv" poetry run pytest tests/ -v -m integration ${PYTEST_ARGS} test-integration-rebuild: @@ -70,34 +70,29 @@ test-s3: # Run tests marked with s3, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-minio.sh poetry run pytest tests/ -m s3 ${PYTEST_ARGS} -test-adls: ## Run tests marked with adls, can add arguments with PYTEST_ARGS="-vv" +test-adls: # Run tests marked with adls, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-azurite.sh poetry run pytest tests/ -m adls ${PYTEST_ARGS} -test-gcs: ## Run tests marked with gcs, can add arguments with PYTEST_ARGS="-vv" +test-gcs: # Run tests marked with gcs, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-gcs-server.sh poetry run pytest tests/ -m gcs ${PYTEST_ARGS} test-coverage-unit: # Run test with coverage for unit tests, can add arguments with PYTEST_ARGS="-vv" poetry run coverage run --source=pyiceberg/ --data-file=.coverage.unit -m pytest tests/ -v -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS} -test-coverage-integration: # Run test with coverage for integration tests, can add arguments with PYTEST_ARGS="-vv" - docker compose -f dev/docker-compose-integration.yml kill - docker compose -f dev/docker-compose-integration.yml rm -f - docker compose -f dev/docker-compose-integration.yml up -d - sleep 10 - docker compose -f dev/docker-compose-integration.yml cp ./dev/provision.py spark-iceberg:/opt/spark/provision.py +test-coverage-integration: test-integration-setup # Run test with coverage for integration tests, can add arguments with PYTEST_ARGS="-vv" docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -v -m integration ${PYTEST_ARGS} -test-coverage: | test-coverage-unit test-coverage-integration test-s3 test-adls test-gcs ## Run all tests with coverage including unit and integration tests +test-coverage: | test-coverage-unit test-coverage-integration test-s3 test-adls test-gcs # Run all tests with coverage including unit and integration tests poetry run coverage combine .coverage.unit .coverage.integration poetry run coverage report -m --fail-under=90 poetry run coverage html poetry run coverage xml -clean: ## Clean up the project Python working environment +clean: # Clean up the project Python working environment @echo "Cleaning up Cython and Python cached files" @rm -rf build dist *.egg-info @find . -name "*.so" -exec echo Deleting {} \; -delete From df51facbf33e213ccbbb36371a1bcd2f62f0835c Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 19 Jun 2025 11:26:08 -0700 Subject: [PATCH 04/16] refactor --- .github/workflows/python-ci.yml | 18 ++++++++++++++++++ Makefile | 19 +++++++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 0e0b0c0f62..f8a688d226 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -84,3 +84,21 @@ jobs: - name: Show debug logs if: ${{ failure() }} run: docker compose -f dev/docker-compose.yml logs + + object-store-integration-test: + runs-on: ubuntu-22.04 + strategy: + matrix: + python: ['3.9', '3.10', '3.11', '3.12'] + + steps: + - uses: actions/checkout@v4 + - name: Install system dependencies + run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos + - name: Install + run: make install + - name: Run object store integration tests + run: make test-coverage-object-store-integration + - name: Show debug logs + if: ${{ failure() }} + run: docker compose -f dev/docker-compose.yml logs diff --git a/Makefile b/Makefile index 686cfd0554..7b3af0a3d7 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ install-poetry: # Ensure Poetry is installed and the correct version is being us install-dependencies: # Install dependencies including dev, docs, and all extras poetry install --all-extras -install: | install-poetry install-dependencies +install: | install-poetry install-dependencies # Install poetry and all dependencies check-license: # Check license headers ./dev/check-license @@ -66,6 +66,8 @@ test-integration-rebuild: docker compose -f dev/docker-compose-integration.yml rm -f docker compose -f dev/docker-compose-integration.yml build --no-cache +test-object-store-integration: test-s3 test-adls test-gcs # Run object stores integration tests, can add arguments with PYTEST_ARGS="-vv" + test-s3: # Run tests marked with s3, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-minio.sh poetry run pytest tests/ -m s3 ${PYTEST_ARGS} @@ -81,17 +83,26 @@ test-gcs: # Run tests marked with gcs, can add arguments with PYTEST_ARGS="-vv" test-coverage-unit: # Run test with coverage for unit tests, can add arguments with PYTEST_ARGS="-vv" poetry run coverage run --source=pyiceberg/ --data-file=.coverage.unit -m pytest tests/ -v -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS} -test-coverage-integration: test-integration-setup # Run test with coverage for integration tests, can add arguments with PYTEST_ARGS="-vv" +test-coverage-integration: test-integration-setup test-object-store-integration # Run test with coverage for integration tests, can add arguments with PYTEST_ARGS="-vv" docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -v -m integration ${PYTEST_ARGS} -test-coverage: | test-coverage-unit test-coverage-integration test-s3 test-adls test-gcs # Run all tests with coverage including unit and integration tests +test-coverage-object-store-integration: # Run test with coverage for object stores integration tests, can add arguments with PYTEST_ARGS="-vv" + sh ./dev/run-minio.sh + poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -m s3 ${PYTEST_ARGS} + + sh ./dev/run-azurite.sh + poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -m adls ${PYTEST_ARGS} + + sh ./dev/run-gcs-server.sh + poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -m gcs ${PYTEST_ARGS} + +test-coverage: | test-coverage-unit test-coverage-integration test-coverage-object-store-integration # Run all tests with coverage including unit and integration tests poetry run coverage combine .coverage.unit .coverage.integration poetry run coverage report -m --fail-under=90 poetry run coverage html poetry run coverage xml - clean: # Clean up the project Python working environment @echo "Cleaning up Cython and Python cached files" @rm -rf build dist *.egg-info From 90333e5dd111c0a1e2256ac0e00afcb638cb7498 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 19 Jun 2025 11:45:24 -0700 Subject: [PATCH 05/16] fix gcs test warnings --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 4e479e9d0e..a86a0a7c43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -338,6 +338,10 @@ filterwarnings = [ "ignore:datetime.datetime.utcnow\\(\\) is deprecated and scheduled for removal in a future version.", # Latest PySpark version (v3.5.3) throws this error, remove in a future release of PySpark (possibly v4.0.0). "ignore:is_datetime64tz_dtype is deprecated and will be removed in a future version.", + # From fsspec gcsfs + "ignore:pkg_resources is deprecated as an API.", + "ignore:Deprecated call to `pkg_resources\\.declare_namespace\\('.*'\\):DeprecationWarning", + "ignore::DeprecationWarning:google.rpc", ] [tool.black] From cf061a2011c65462c05b38357d4beee5187d621d Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Thu, 19 Jun 2025 13:55:25 -0700 Subject: [PATCH 06/16] redundant provision --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index 7b3af0a3d7..d27d7e1228 100644 --- a/Makefile +++ b/Makefile @@ -84,7 +84,6 @@ test-coverage-unit: # Run test with coverage for unit tests, can add arguments w poetry run coverage run --source=pyiceberg/ --data-file=.coverage.unit -m pytest tests/ -v -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS} test-coverage-integration: test-integration-setup test-object-store-integration # Run test with coverage for integration tests, can add arguments with PYTEST_ARGS="-vv" - docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -v -m integration ${PYTEST_ARGS} test-coverage-object-store-integration: # Run test with coverage for object stores integration tests, can add arguments with PYTEST_ARGS="-vv" From cc30ab3ddcea9fb36b3550a6e9183169c76d2112 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 20 Jun 2025 07:52:47 -0700 Subject: [PATCH 07/16] revert pyproject.toml change --- pyproject.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a86a0a7c43..4e479e9d0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -338,10 +338,6 @@ filterwarnings = [ "ignore:datetime.datetime.utcnow\\(\\) is deprecated and scheduled for removal in a future version.", # Latest PySpark version (v3.5.3) throws this error, remove in a future release of PySpark (possibly v4.0.0). "ignore:is_datetime64tz_dtype is deprecated and will be removed in a future version.", - # From fsspec gcsfs - "ignore:pkg_resources is deprecated as an API.", - "ignore:Deprecated call to `pkg_resources\\.declare_namespace\\('.*'\\):DeprecationWarning", - "ignore::DeprecationWarning:google.rpc", ] [tool.black] From 48a37efa8b75d75933cbe4a84308d51855b60545 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 20 Jun 2025 07:59:36 -0700 Subject: [PATCH 08/16] run file io tests individually --- .github/workflows/python-ci.yml | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index f8a688d226..4d7505e6f6 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -79,26 +79,27 @@ jobs: run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos - name: Install run: make install + - name: Run integration tests run: make test-coverage-integration - name: Show debug logs if: ${{ failure() }} run: docker compose -f dev/docker-compose.yml logs - object-store-integration-test: - runs-on: ubuntu-22.04 - strategy: - matrix: - python: ['3.9', '3.10', '3.11', '3.12'] - - steps: - - uses: actions/checkout@v4 - - name: Install system dependencies - run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos - - name: Install - run: make install - - name: Run object store integration tests - run: make test-coverage-object-store-integration + - name: Run s3 integration tests + run: make test-s3 - name: Show debug logs if: ${{ failure() }} run: docker compose -f dev/docker-compose.yml logs + + - name: Run adls integration tests + run: make test-adls + - name: Show debug logs + if: ${{ failure() }} + run: docker compose -f dev/docker-compose-azurite.yml logs + + - name: Run gcs integration tests + run: make test-gcs + - name: Show debug logs + if: ${{ failure() }} + run: docker compose -f dev/docker-compose-gcs-server.yml logs From 53c9bd95fe0994b5ab38c046c672c63e56c80ef9 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 20 Jun 2025 08:42:15 -0700 Subject: [PATCH 09/16] parameterize coverage --- Makefile | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index d27d7e1228..290c6ad292 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,13 @@ # specific language governing permissions and limitations # under the License. +PYTEST_ARGS ?= -v +COVERAGE ?= 0 +ifeq ($(COVERAGE),1) + TEST_RUNNER = poetry run coverage run --parallel-mode --source=pyiceberg -m +else + TEST_RUNNER = poetry run +endif help: # Display this help @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) @@ -42,11 +49,11 @@ install: | install-poetry install-dependencies # Install poetry and all dependen check-license: # Check license headers ./dev/check-license -lint: # lint +lint: # Run linters poetry run pre-commit run --all-files test: # Run all unit tests, can add arguments with PYTEST_ARGS="-vv" - poetry run pytest tests/ -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS} + $(TEST_RUNNER) pytest tests/ -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS} test-integration: | test-integration-setup test-integration-exec # Run all integration tests, can add arguments with PYTEST_ARGS="-vv" @@ -59,45 +66,28 @@ test-integration-setup: # Prepare the environment for integration docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py test-integration-exec: # Execute integration tests, can add arguments with PYTEST_ARGS="-vv" - poetry run pytest tests/ -v -m integration ${PYTEST_ARGS} + $(TEST_RUNNER) pytest tests/ -v -m integration ${PYTEST_ARGS} test-integration-rebuild: docker compose -f dev/docker-compose-integration.yml kill docker compose -f dev/docker-compose-integration.yml rm -f docker compose -f dev/docker-compose-integration.yml build --no-cache -test-object-store-integration: test-s3 test-adls test-gcs # Run object stores integration tests, can add arguments with PYTEST_ARGS="-vv" - test-s3: # Run tests marked with s3, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-minio.sh - poetry run pytest tests/ -m s3 ${PYTEST_ARGS} + $(TEST_RUNNER) pytest tests/ -m s3 ${PYTEST_ARGS} test-adls: # Run tests marked with adls, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-azurite.sh - poetry run pytest tests/ -m adls ${PYTEST_ARGS} + $(TEST_RUNNER) pytest tests/ -m adls ${PYTEST_ARGS} test-gcs: # Run tests marked with gcs, can add arguments with PYTEST_ARGS="-vv" sh ./dev/run-gcs-server.sh - poetry run pytest tests/ -m gcs ${PYTEST_ARGS} - -test-coverage-unit: # Run test with coverage for unit tests, can add arguments with PYTEST_ARGS="-vv" - poetry run coverage run --source=pyiceberg/ --data-file=.coverage.unit -m pytest tests/ -v -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS} - -test-coverage-integration: test-integration-setup test-object-store-integration # Run test with coverage for integration tests, can add arguments with PYTEST_ARGS="-vv" - poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -v -m integration ${PYTEST_ARGS} - -test-coverage-object-store-integration: # Run test with coverage for object stores integration tests, can add arguments with PYTEST_ARGS="-vv" - sh ./dev/run-minio.sh - poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -m s3 ${PYTEST_ARGS} - - sh ./dev/run-azurite.sh - poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -m adls ${PYTEST_ARGS} - - sh ./dev/run-gcs-server.sh - poetry run coverage run --source=pyiceberg/ --data-file=.coverage.integration -m pytest tests/ -m gcs ${PYTEST_ARGS} + $(TEST_RUNNER) pytest tests/ -m gcs ${PYTEST_ARGS} -test-coverage: | test-coverage-unit test-coverage-integration test-coverage-object-store-integration # Run all tests with coverage including unit and integration tests - poetry run coverage combine .coverage.unit .coverage.integration +test-coverage: COVERAGE=1 +test-coverage: test test-integration test-s3 test-adls test-gcs ## Run all tests with coverage + poetry run coverage combine poetry run coverage report -m --fail-under=90 poetry run coverage html poetry run coverage xml From 98d5b6626ef885b939509e999d80c9f0841a59ef Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 20 Jun 2025 08:42:21 -0700 Subject: [PATCH 10/16] add comments --- Makefile | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 290c6ad292..15573ae8e5 100644 --- a/Makefile +++ b/Makefile @@ -15,8 +15,18 @@ # specific language governing permissions and limitations # under the License. +# Optional arguments passed to pytest. Can be overridden at runtime: +# make test PYTEST_ARGS="-vv --tb=short" PYTEST_ARGS ?= -v + +# Toggle coverage collection: +# make test COVERAGE=1 -> runs tests with coverage +# make test -> runs tests without coverage COVERAGE ?= 0 + +# Selects the test runner command based on the COVERAGE flag. +# If COVERAGE=1, uses coverage to track test execution with --parallel-mode. +# Otherwise, runs tests normally using poetry and pytest. ifeq ($(COVERAGE),1) TEST_RUNNER = poetry run coverage run --parallel-mode --source=pyiceberg -m else @@ -52,10 +62,11 @@ check-license: # Check license headers lint: # Run linters poetry run pre-commit run --all-files -test: # Run all unit tests, can add arguments with PYTEST_ARGS="-vv" +test: # Run all unit tests $(TEST_RUNNER) pytest tests/ -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS} -test-integration: | test-integration-setup test-integration-exec # Run all integration tests, can add arguments with PYTEST_ARGS="-vv" +test-integration: | test-integration-setup test-integration-exec # Run integration tests, excludes s3, adls, and gcs tests + $(TEST_RUNNER) pytest tests/ -m integration ${PYTEST_ARGS} test-integration-setup: # Prepare the environment for integration docker compose -f dev/docker-compose-integration.yml kill @@ -65,7 +76,7 @@ test-integration-setup: # Prepare the environment for integration docker compose -f dev/docker-compose-integration.yml cp ./dev/provision.py spark-iceberg:/opt/spark/provision.py docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py -test-integration-exec: # Execute integration tests, can add arguments with PYTEST_ARGS="-vv" +test-integration-exec: # Execute integration tests $(TEST_RUNNER) pytest tests/ -v -m integration ${PYTEST_ARGS} test-integration-rebuild: @@ -73,15 +84,15 @@ test-integration-rebuild: docker compose -f dev/docker-compose-integration.yml rm -f docker compose -f dev/docker-compose-integration.yml build --no-cache -test-s3: # Run tests marked with s3, can add arguments with PYTEST_ARGS="-vv" +test-s3: # Run tests marked with s3 sh ./dev/run-minio.sh $(TEST_RUNNER) pytest tests/ -m s3 ${PYTEST_ARGS} -test-adls: # Run tests marked with adls, can add arguments with PYTEST_ARGS="-vv" +test-adls: # Run tests marked with adls sh ./dev/run-azurite.sh $(TEST_RUNNER) pytest tests/ -m adls ${PYTEST_ARGS} -test-gcs: # Run tests marked with gcs, can add arguments with PYTEST_ARGS="-vv" +test-gcs: # Run tests marked with gcs sh ./dev/run-gcs-server.sh $(TEST_RUNNER) pytest tests/ -m gcs ${PYTEST_ARGS} From 074c0e84616f578616f6e334cc230c4a7fc6c0e1 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 20 Jun 2025 09:02:31 -0700 Subject: [PATCH 11/16] refactor makefile --- Makefile | 129 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 80 insertions(+), 49 deletions(-) diff --git a/Makefile b/Makefile index 15573ae8e5..4d5a8d94f3 100644 --- a/Makefile +++ b/Makefile @@ -14,61 +14,80 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# ======================== +# Configuration Variables +# ======================== -# Optional arguments passed to pytest. Can be overridden at runtime: -# make test PYTEST_ARGS="-vv --tb=short" -PYTEST_ARGS ?= -v +PYTEST_ARGS ?= -v # Override with e.g. PYTEST_ARGS="-vv --tb=short" +COVERAGE ?= 0 # Set COVERAGE=1 to enable coverage: make test COVERAGE=1 -# Toggle coverage collection: -# make test COVERAGE=1 -> runs tests with coverage -# make test -> runs tests without coverage -COVERAGE ?= 0 - -# Selects the test runner command based on the COVERAGE flag. -# If COVERAGE=1, uses coverage to track test execution with --parallel-mode. -# Otherwise, runs tests normally using poetry and pytest. ifeq ($(COVERAGE),1) TEST_RUNNER = poetry run coverage run --parallel-mode --source=pyiceberg -m else TEST_RUNNER = poetry run endif -help: # Display this help - @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) - POETRY_VERSION = 2.1.1 -install-poetry: # Ensure Poetry is installed and the correct version is being used. + +# ============ +# Help Section +# ============ + +##@ General + +help: ## Display this help message + @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-25s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) + +# ================== +# Installation Tasks +# ================== + +##@ Setup + +install-poetry: ## Ensure Poetry is installed at the specified version @if ! command -v poetry &> /dev/null; then \ - echo "Poetry could not be found. Installing..."; \ + echo "Poetry not found. Installing..."; \ pip install --user poetry==$(POETRY_VERSION); \ else \ INSTALLED_VERSION=$$(pip show poetry | grep Version | awk '{print $$2}'); \ if [ "$$INSTALLED_VERSION" != "$(POETRY_VERSION)" ]; then \ - echo "Poetry version $$INSTALLED_VERSION does not match required version $(POETRY_VERSION). Updating..."; \ + echo "Updating Poetry to version $(POETRY_VERSION)..."; \ pip install --user --upgrade poetry==$(POETRY_VERSION); \ else \ - echo "Poetry version $$INSTALLED_VERSION is already installed."; \ - fi \ + echo "Poetry version $(POETRY_VERSION) already installed."; \ + fi; \ fi -install-dependencies: # Install dependencies including dev, docs, and all extras +install-dependencies: ## Install all dependencies including extras poetry install --all-extras -install: | install-poetry install-dependencies # Install poetry and all dependencies +install: install-poetry install-dependencies ## Install Poetry and dependencies + +# =============== +# Code Validation +# =============== -check-license: # Check license headers +##@ Quality + +check-license: ## Check license headers ./dev/check-license -lint: # Run linters +lint: ## Run code linters via pre-commit poetry run pre-commit run --all-files -test: # Run all unit tests - $(TEST_RUNNER) pytest tests/ -m "(unmarked or parametrize) and not integration" ${PYTEST_ARGS} +# =============== +# Testing Section +# =============== + +##@ Testing -test-integration: | test-integration-setup test-integration-exec # Run integration tests, excludes s3, adls, and gcs tests - $(TEST_RUNNER) pytest tests/ -m integration ${PYTEST_ARGS} +test: ## Run all unit tests (excluding integration) + $(TEST_RUNNER) pytest tests/ -m "(unmarked or parametrize) and not integration" $(PYTEST_ARGS) -test-integration-setup: # Prepare the environment for integration +test-integration: test-integration-setup test-integration-exec ## Run integration tests + $(TEST_RUNNER) pytest tests/ -m integration $(PYTEST_ARGS) + +test-integration-setup: ## Start Docker services for integration tests docker compose -f dev/docker-compose-integration.yml kill docker compose -f dev/docker-compose-integration.yml rm -f docker compose -f dev/docker-compose-integration.yml up -d @@ -76,25 +95,25 @@ test-integration-setup: # Prepare the environment for integration docker compose -f dev/docker-compose-integration.yml cp ./dev/provision.py spark-iceberg:/opt/spark/provision.py docker compose -f dev/docker-compose-integration.yml exec -T spark-iceberg ipython ./provision.py -test-integration-exec: # Execute integration tests - $(TEST_RUNNER) pytest tests/ -v -m integration ${PYTEST_ARGS} +test-integration-exec: ## Run integration tests (excluding provision) + $(TEST_RUNNER) pytest tests/ -m integration $(PYTEST_ARGS) -test-integration-rebuild: +test-integration-rebuild: ## Rebuild integration Docker services from scratch docker compose -f dev/docker-compose-integration.yml kill docker compose -f dev/docker-compose-integration.yml rm -f docker compose -f dev/docker-compose-integration.yml build --no-cache -test-s3: # Run tests marked with s3 +test-s3: ## Run tests marked with @pytest.mark.s3 sh ./dev/run-minio.sh - $(TEST_RUNNER) pytest tests/ -m s3 ${PYTEST_ARGS} + $(TEST_RUNNER) pytest tests/ -m s3 $(PYTEST_ARGS) -test-adls: # Run tests marked with adls +test-adls: ## Run tests marked with @pytest.mark.adls sh ./dev/run-azurite.sh - $(TEST_RUNNER) pytest tests/ -m adls ${PYTEST_ARGS} + $(TEST_RUNNER) pytest tests/ -m adls $(PYTEST_ARGS) -test-gcs: # Run tests marked with gcs +test-gcs: ## Run tests marked with @pytest.mark.gcs sh ./dev/run-gcs-server.sh - $(TEST_RUNNER) pytest tests/ -m gcs ${PYTEST_ARGS} + $(TEST_RUNNER) pytest tests/ -m gcs $(PYTEST_ARGS) test-coverage: COVERAGE=1 test-coverage: test test-integration test-s3 test-adls test-gcs ## Run all tests with coverage @@ -103,21 +122,33 @@ test-coverage: test test-integration test-s3 test-adls test-gcs ## Run all tests poetry run coverage html poetry run coverage xml -clean: # Clean up the project Python working environment - @echo "Cleaning up Cython and Python cached files" - @rm -rf build dist *.egg-info - @find . -name "*.so" -exec echo Deleting {} \; -delete - @find . -name "*.pyc" -exec echo Deleting {} \; -delete - @find . -name "__pycache__" -exec echo Deleting {} \; -exec rm -rf {} + - @find . -name "*.pyd" -exec echo Deleting {} \; -delete - @find . -name "*.pyo" -exec echo Deleting {} \; -delete - @echo "Cleanup complete" +# ================ +# Documentation +# ================ -docs-install: +##@ Documentation + +docs-install: ## Install docs dependencies poetry install --with docs -docs-serve: +docs-serve: ## Serve local docs preview (hot reload) poetry run mkdocs serve -f mkdocs/mkdocs.yml -docs-build: +docs-build: ## Build the static documentation site poetry run mkdocs build -f mkdocs/mkdocs.yml --strict + +# =================== +# Project Maintenance +# =================== + +##@ Maintenance + +clean: ## Remove build artifacts and caches + @echo "Cleaning up Cython and Python cached files..." + @rm -rf build dist *.egg-info + @find . -name "*.so" -exec echo Deleting {} \; -delete + @find . -name "*.pyc" -exec echo Deleting {} \; -delete + @find . -name "__pycache__" -exec echo Deleting {} \; -exec rm -rf {} + + @find . -name "*.pyd" -exec echo Deleting {} \; -delete + @find . -name "*.pyo" -exec echo Deleting {} \; -delete + @echo "Cleanup complete." From 16b6024cdc3e19f76765c2cd89df3a4a453dbb19 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 20 Jun 2025 09:02:43 -0700 Subject: [PATCH 12/16] run ci without coverage --- .github/workflows/python-ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 4d7505e6f6..773493aca0 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -62,10 +62,10 @@ jobs: run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos - name: Install run: make install-dependencies - - name: Linters + - name: Run linters run: make lint - - name: Tests - run: make test-coverage-unit + - name: Run unit tests + run: make test integration-test: runs-on: ubuntu-22.04 @@ -81,7 +81,7 @@ jobs: run: make install - name: Run integration tests - run: make test-coverage-integration + run: make test-integration - name: Show debug logs if: ${{ failure() }} run: docker compose -f dev/docker-compose.yml logs From 618822cbbf059ceacc446c5336c64d117a3a60b4 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 20 Jun 2025 09:13:08 -0700 Subject: [PATCH 13/16] ci run with coverage report --- .github/workflows/python-ci.yml | 25 +++++++++++++++---------- Makefile | 4 +++- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index 773493aca0..a4ce529237 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -64,8 +64,10 @@ jobs: run: make install-dependencies - name: Run linters run: make lint - - name: Run unit tests - run: make test + - name: Run unit tests with coverage + run: COVERAGE=1 make test + - name: Generate coverage report + run: make coverage-report integration-test: runs-on: ubuntu-22.04 @@ -80,26 +82,29 @@ jobs: - name: Install run: make install - - name: Run integration tests - run: make test-integration + - name: Run integration tests with coverage + run: COVERAGE=1 make test-integration - name: Show debug logs if: ${{ failure() }} run: docker compose -f dev/docker-compose.yml logs - - name: Run s3 integration tests - run: make test-s3 + - name: Run s3 integration tests with coverage + run: COVERAGE=1 make test-s3 - name: Show debug logs if: ${{ failure() }} run: docker compose -f dev/docker-compose.yml logs - - name: Run adls integration tests - run: make test-adls + - name: Run adls integration tests with coverage + run: COVERAGE=1 make test-adls - name: Show debug logs if: ${{ failure() }} run: docker compose -f dev/docker-compose-azurite.yml logs - - name: Run gcs integration tests - run: make test-gcs + - name: Run gcs integration tests with coverage + run: COVERAGE=1 make test-gcs - name: Show debug logs if: ${{ failure() }} run: docker compose -f dev/docker-compose-gcs-server.yml logs + + - name: Generate coverage report + run: make coverage-report diff --git a/Makefile b/Makefile index 4d5a8d94f3..5560820d71 100644 --- a/Makefile +++ b/Makefile @@ -116,7 +116,9 @@ test-gcs: ## Run tests marked with @pytest.mark.gcs $(TEST_RUNNER) pytest tests/ -m gcs $(PYTEST_ARGS) test-coverage: COVERAGE=1 -test-coverage: test test-integration test-s3 test-adls test-gcs ## Run all tests with coverage +test-coverage: test test-integration test-s3 test-adls test-gcs coverage-report ## Run all tests with coverage and report + +coverage-report: ## Combine and report coverage poetry run coverage combine poetry run coverage report -m --fail-under=90 poetry run coverage html From a4c366b8a3d6797ceac7d9cf6363052733005925 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 20 Jun 2025 09:15:41 -0700 Subject: [PATCH 14/16] dont run integration tests twice! --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index 5560820d71..ad8eeb53ec 100644 --- a/Makefile +++ b/Makefile @@ -85,7 +85,6 @@ test: ## Run all unit tests (excluding integration) $(TEST_RUNNER) pytest tests/ -m "(unmarked or parametrize) and not integration" $(PYTEST_ARGS) test-integration: test-integration-setup test-integration-exec ## Run integration tests - $(TEST_RUNNER) pytest tests/ -m integration $(PYTEST_ARGS) test-integration-setup: ## Start Docker services for integration tests docker compose -f dev/docker-compose-integration.yml kill From 1e009e9d27c9b649b79132d701d68e7366345bbf Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 20 Jun 2025 09:24:33 -0700 Subject: [PATCH 15/16] lower coverage threashold to 85 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ad8eeb53ec..7872313c3f 100644 --- a/Makefile +++ b/Makefile @@ -119,7 +119,7 @@ test-coverage: test test-integration test-s3 test-adls test-gcs coverage-report coverage-report: ## Combine and report coverage poetry run coverage combine - poetry run coverage report -m --fail-under=90 + poetry run coverage report -m --fail-under=85 poetry run coverage html poetry run coverage xml From 5e644368e4cf07c2be265892689bcf46b9c54176 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Fri, 20 Jun 2025 12:22:09 -0700 Subject: [PATCH 16/16] coverage % configurable --- .github/workflows/python-ci.yml | 8 ++++---- Makefile | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml index a4ce529237..c8cdbcc58a 100644 --- a/.github/workflows/python-ci.yml +++ b/.github/workflows/python-ci.yml @@ -66,8 +66,8 @@ jobs: run: make lint - name: Run unit tests with coverage run: COVERAGE=1 make test - - name: Generate coverage report - run: make coverage-report + - name: Generate coverage report (85%) # Coverage threshold should only increase over time — never decrease it! + run: COVERAGE_FAIL_UNDER=85 make coverage-report integration-test: runs-on: ubuntu-22.04 @@ -106,5 +106,5 @@ jobs: if: ${{ failure() }} run: docker compose -f dev/docker-compose-gcs-server.yml logs - - name: Generate coverage report - run: make coverage-report + - name: Generate coverage report (75%) # Coverage threshold should only increase over time — never decrease it! + run: COVERAGE_FAIL_UNDER=75 make coverage-report diff --git a/Makefile b/Makefile index 7872313c3f..859d8dfa23 100644 --- a/Makefile +++ b/Makefile @@ -20,6 +20,7 @@ PYTEST_ARGS ?= -v # Override with e.g. PYTEST_ARGS="-vv --tb=short" COVERAGE ?= 0 # Set COVERAGE=1 to enable coverage: make test COVERAGE=1 +COVERAGE_FAIL_UNDER ?= 85 # Minimum coverage % to pass: make coverage-report COVERAGE_FAIL_UNDER=70 ifeq ($(COVERAGE),1) TEST_RUNNER = poetry run coverage run --parallel-mode --source=pyiceberg -m @@ -119,7 +120,7 @@ test-coverage: test test-integration test-s3 test-adls test-gcs coverage-report coverage-report: ## Combine and report coverage poetry run coverage combine - poetry run coverage report -m --fail-under=85 + poetry run coverage report -m --fail-under=$(COVERAGE_FAIL_UNDER) poetry run coverage html poetry run coverage xml