Skip to content

Commit 52a3643

Browse files
committed
Merge branch 'main' into auth-manager-oauth
2 parents 613ca53 + c84017d commit 52a3643

File tree

12 files changed

+213
-219
lines changed

12 files changed

+213
-219
lines changed

.github/workflows/python-ci.yml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,22 @@ jobs:
6565
- name: Linters
6666
run: make lint
6767
- name: Tests
68-
run: make test-coverage
68+
run: make test-coverage-unit
69+
70+
integration-test:
71+
runs-on: ubuntu-22.04
72+
strategy:
73+
matrix:
74+
python: ['3.9', '3.10', '3.11', '3.12']
75+
76+
steps:
77+
- uses: actions/checkout@v4
78+
- name: Install system dependencies
79+
run: sudo apt-get update && sudo apt-get install -y libkrb5-dev # for kerberos
80+
- name: Install
81+
run: make install
82+
- name: Run integration tests
83+
run: make test-coverage-integration
84+
- name: Show debug logs
85+
if: ${{ failure() }}
86+
run: docker compose -f dev/docker-compose.yml logs

.github/workflows/python-integration.yml

Lines changed: 0 additions & 61 deletions
This file was deleted.

poetry.lock

Lines changed: 133 additions & 133 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyiceberg/catalog/hive.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from hive_metastore.ttypes import (
3737
AlreadyExistsException,
3838
CheckLockRequest,
39+
EnvironmentContext,
3940
FieldSchema,
4041
InvalidOperationException,
4142
LockComponent,
@@ -135,6 +136,8 @@
135136
DEFAULT_LOCK_CHECK_MIN_WAIT_TIME = 0.1 # 100 milliseconds
136137
DEFAULT_LOCK_CHECK_MAX_WAIT_TIME = 60 # 1 min
137138
DEFAULT_LOCK_CHECK_RETRIES = 4
139+
DO_NOT_UPDATE_STATS = "DO_NOT_UPDATE_STATS"
140+
DO_NOT_UPDATE_STATS_DEFAULT = "true"
138141

139142
logger = logging.getLogger(__name__)
140143

@@ -539,7 +542,12 @@ def commit_table(
539542
metadata_location=updated_staged_table.metadata_location,
540543
previous_metadata_location=current_table.metadata_location,
541544
)
542-
open_client.alter_table(dbname=database_name, tbl_name=table_name, new_tbl=hive_table)
545+
open_client.alter_table_with_environment_context(
546+
dbname=database_name,
547+
tbl_name=table_name,
548+
new_tbl=hive_table,
549+
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: DO_NOT_UPDATE_STATS_DEFAULT}),
550+
)
543551
else:
544552
# Table does not exist, create it.
545553
hive_table = self._convert_iceberg_into_hive(
@@ -626,7 +634,12 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
626634
tbl = open_client.get_table(dbname=from_database_name, tbl_name=from_table_name)
627635
tbl.dbName = to_database_name
628636
tbl.tableName = to_table_name
629-
open_client.alter_table(dbname=from_database_name, tbl_name=from_table_name, new_tbl=tbl)
637+
open_client.alter_table_with_environment_context(
638+
dbname=from_database_name,
639+
tbl_name=from_table_name,
640+
new_tbl=tbl,
641+
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: DO_NOT_UPDATE_STATS_DEFAULT}),
642+
)
630643
except NoSuchObjectException as e:
631644
raise NoSuchTableError(f"Table does not exist: {from_table_name}") from e
632645
except InvalidOperationException as e:

pyiceberg/catalog/rest/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -725,7 +725,7 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identi
725725
try:
726726
response.raise_for_status()
727727
except HTTPError as exc:
728-
_handle_non_200_response(exc, {})
728+
_handle_non_200_response(exc, {404: NoSuchNamespaceError})
729729

730730
return ListNamespaceResponse.model_validate_json(response.text).namespaces
731731

pyiceberg/table/metadata.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -549,13 +549,6 @@ def construct_refs(cls, table_metadata: TableMetadata) -> TableMetadata:
549549
"""The table’s highest assigned sequence number, a monotonically
550550
increasing long that tracks the order of snapshots in a table."""
551551

552-
row_lineage: bool = Field(alias="row-lineage", default=False)
553-
"""Indicates that row-lineage is enabled on the table
554-
555-
For more information:
556-
https://iceberg.apache.org/spec/?column-projection#row-lineage
557-
"""
558-
559552
next_row_id: Optional[int] = Field(alias="next-row-id", default=None)
560553
"""A long higher than all assigned row IDs; the next snapshot's `first-row-id`."""
561554

pyiceberg/table/snapshots.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ def ancestors_of(current_snapshot: Optional[Snapshot], table_metadata: TableMeta
438438

439439

440440
def ancestors_between(
441-
to_snapshot: Snapshot, from_snapshot: Optional[Snapshot], table_metadata: TableMetadata
441+
from_snapshot: Optional[Snapshot], to_snapshot: Snapshot, table_metadata: TableMetadata
442442
) -> Iterable[Snapshot]:
443443
"""Get the ancestors of and including the given snapshot between the to and from snapshots."""
444444
if from_snapshot is not None:

pyiceberg/table/update/validate.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,17 @@
2323

2424
def validation_history(
2525
table: Table,
26-
to_snapshot: Snapshot,
2726
from_snapshot: Snapshot,
27+
to_snapshot: Snapshot,
2828
matching_operations: set[Operation],
2929
manifest_content_filter: ManifestContent,
3030
) -> tuple[list[ManifestFile], set[int]]:
3131
"""Return newly added manifests and snapshot IDs between the starting snapshot and parent snapshot.
3232
3333
Args:
3434
table: Table to get the history from
35-
to_snapshot: Starting snapshot
3635
from_snapshot: Parent snapshot to get the history from
36+
to_snapshot: Starting snapshot
3737
matching_operations: Operations to match on
3838
manifest_content_filter: Manifest content type to filter
3939
@@ -47,7 +47,7 @@ def validation_history(
4747
snapshots: set[int] = set()
4848

4949
last_snapshot = None
50-
for snapshot in ancestors_between(to_snapshot, from_snapshot, table.metadata):
50+
for snapshot in ancestors_between(from_snapshot, to_snapshot, table.metadata):
5151
last_snapshot = snapshot
5252
summary = snapshot.summary
5353
if summary is None:

tests/catalog/test_hive.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import thrift.transport.TSocket
3030
from hive_metastore.ttypes import (
3131
AlreadyExistsException,
32+
EnvironmentContext,
3233
FieldSchema,
3334
InvalidOperationException,
3435
LockResponse,
@@ -44,6 +45,8 @@
4445

4546
from pyiceberg.catalog import PropertiesUpdateSummary
4647
from pyiceberg.catalog.hive import (
48+
DO_NOT_UPDATE_STATS,
49+
DO_NOT_UPDATE_STATS_DEFAULT,
4750
HIVE_KERBEROS_AUTH,
4851
LOCK_CHECK_MAX_WAIT_TIME,
4952
LOCK_CHECK_MIN_WAIT_TIME,
@@ -874,7 +877,7 @@ def test_rename_table(hive_table: HiveTable) -> None:
874877

875878
catalog._client = MagicMock()
876879
catalog._client.__enter__().get_table.side_effect = [hive_table, renamed_table]
877-
catalog._client.__enter__().alter_table.return_value = None
880+
catalog._client.__enter__().alter_table_with_environment_context.return_value = None
878881

879882
from_identifier = ("default", "new_tabl2e")
880883
to_identifier = ("default", "new_tabl3e")
@@ -884,7 +887,12 @@ def test_rename_table(hive_table: HiveTable) -> None:
884887

885888
calls = [call(dbname="default", tbl_name="new_tabl2e"), call(dbname="default", tbl_name="new_tabl3e")]
886889
catalog._client.__enter__().get_table.assert_has_calls(calls)
887-
catalog._client.__enter__().alter_table.assert_called_with(dbname="default", tbl_name="new_tabl2e", new_tbl=renamed_table)
890+
catalog._client.__enter__().alter_table_with_environment_context.assert_called_with(
891+
dbname="default",
892+
tbl_name="new_tabl2e",
893+
new_tbl=renamed_table,
894+
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: DO_NOT_UPDATE_STATS_DEFAULT}),
895+
)
888896

889897

890898
def test_rename_table_from_self_identifier(hive_table: HiveTable) -> None:
@@ -902,22 +910,27 @@ def test_rename_table_from_self_identifier(hive_table: HiveTable) -> None:
902910
renamed_table.tableName = "new_tabl3e"
903911

904912
catalog._client.__enter__().get_table.side_effect = [hive_table, renamed_table]
905-
catalog._client.__enter__().alter_table.return_value = None
913+
catalog._client.__enter__().alter_table_with_environment_context.return_value = None
906914
to_identifier = ("default", "new_tabl3e")
907915
table = catalog.rename_table(from_table.name(), to_identifier)
908916

909917
assert table.name() == to_identifier
910918

911919
calls = [call(dbname="default", tbl_name="new_tabl2e"), call(dbname="default", tbl_name="new_tabl3e")]
912920
catalog._client.__enter__().get_table.assert_has_calls(calls)
913-
catalog._client.__enter__().alter_table.assert_called_with(dbname="default", tbl_name="new_tabl2e", new_tbl=renamed_table)
921+
catalog._client.__enter__().alter_table_with_environment_context.assert_called_with(
922+
dbname="default",
923+
tbl_name="new_tabl2e",
924+
new_tbl=renamed_table,
925+
environment_context=EnvironmentContext(properties={DO_NOT_UPDATE_STATS: DO_NOT_UPDATE_STATS_DEFAULT}),
926+
)
914927

915928

916929
def test_rename_table_from_does_not_exists() -> None:
917930
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)
918931

919932
catalog._client = MagicMock()
920-
catalog._client.__enter__().alter_table.side_effect = NoSuchObjectException(
933+
catalog._client.__enter__().alter_table_with_environment_context.side_effect = NoSuchObjectException(
921934
message="hive.default.does_not_exists table not found"
922935
)
923936

@@ -931,7 +944,7 @@ def test_rename_table_to_namespace_does_not_exists() -> None:
931944
catalog = HiveCatalog(HIVE_CATALOG_NAME, uri=HIVE_METASTORE_FAKE_URL)
932945

933946
catalog._client = MagicMock()
934-
catalog._client.__enter__().alter_table.side_effect = InvalidOperationException(
947+
catalog._client.__enter__().alter_table_with_environment_context.side_effect = InvalidOperationException(
935948
message="Unable to change partition or table. Database default does not exist Check metastore logs for detailed stack.does_not_exists"
936949
)
937950

tests/catalog/test_rest.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,24 @@ def test_list_namespace_with_parent_200(rest_mock: Mocker) -> None:
555555
]
556556

557557

558+
def test_list_namespace_with_parent_404(rest_mock: Mocker) -> None:
559+
rest_mock.get(
560+
f"{TEST_URI}v1/namespaces?parent=some_namespace",
561+
json={
562+
"error": {
563+
"message": "Namespace provided in the `parent` query parameter is not found",
564+
"type": "NoSuchNamespaceException",
565+
"code": 404,
566+
}
567+
},
568+
status_code=404,
569+
request_headers=TEST_HEADERS,
570+
)
571+
572+
with pytest.raises(NoSuchNamespaceError):
573+
RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_namespaces(("some_namespace",))
574+
575+
558576
@pytest.mark.filterwarnings(
559577
"ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client is missing the OAuth2 server URI:DeprecationWarning"
560578
)

0 commit comments

Comments
 (0)