Skip to content

Commit f459662

Browse files
Fix: SqlCatalog list_namespaces() should return only sub-namespaces (#1629)
Resolves: #1627
1 parent e3a5c3b commit f459662

File tree

3 files changed

+67
-24
lines changed

3 files changed

+67
-24
lines changed

pyiceberg/catalog/sql.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -619,15 +619,28 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identi
619619
table_stmt = select(IcebergTables.table_namespace).where(IcebergTables.catalog_name == self.name)
620620
namespace_stmt = select(IcebergNamespaceProperties.namespace).where(IcebergNamespaceProperties.catalog_name == self.name)
621621
if namespace:
622-
namespace_str = Catalog.namespace_to_string(namespace, NoSuchNamespaceError)
623-
table_stmt = table_stmt.where(IcebergTables.table_namespace.like(namespace_str))
624-
namespace_stmt = namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_str))
622+
namespace_like = Catalog.namespace_to_string(namespace, NoSuchNamespaceError) + "%"
623+
table_stmt = table_stmt.where(IcebergTables.table_namespace.like(namespace_like))
624+
namespace_stmt = namespace_stmt.where(IcebergNamespaceProperties.namespace.like(namespace_like))
625625
stmt = union(
626626
table_stmt,
627627
namespace_stmt,
628628
)
629629
with Session(self.engine) as session:
630-
return [Catalog.identifier_to_tuple(namespace_col) for namespace_col in session.execute(stmt).scalars()]
630+
namespace_tuple = Catalog.identifier_to_tuple(namespace)
631+
sub_namespaces_level_length = len(namespace_tuple) + 1
632+
633+
namespaces = list(
634+
{ # only get distinct namespaces
635+
ns[:sub_namespaces_level_length] # truncate to the required level
636+
for ns in {Catalog.identifier_to_tuple(ns) for ns in session.execute(stmt).scalars()}
637+
if len(ns) >= sub_namespaces_level_length # only get sub namespaces/children
638+
and ns[: sub_namespaces_level_length - 1] == namespace_tuple
639+
# exclude fuzzy matches when `namespace` contains `%` or `_`
640+
}
641+
)
642+
643+
return namespaces
631644

632645
def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties:
633646
"""Get properties for a namespace.

tests/catalog/test_base.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ def test_rename_table(catalog: InMemoryCatalog) -> None:
318318
assert table._identifier == Catalog.identifier_to_tuple(new_table)
319319

320320
# And
321-
assert ("new", "namespace") in catalog.list_namespaces()
321+
assert catalog._namespace_exists(table._identifier[:-1])
322322

323323
# And
324324
with pytest.raises(NoSuchTableError, match=NO_SUCH_TABLE_ERROR):
@@ -342,7 +342,7 @@ def test_rename_table_from_self_identifier(catalog: InMemoryCatalog) -> None:
342342
assert new_table._identifier == Catalog.identifier_to_tuple(new_table_name)
343343

344344
# And
345-
assert ("new", "namespace") in catalog.list_namespaces()
345+
assert catalog._namespace_exists(new_table._identifier[:-1])
346346

347347
# And
348348
with pytest.raises(NoSuchTableError, match=NO_SUCH_TABLE_ERROR):
@@ -356,7 +356,7 @@ def test_create_namespace(catalog: InMemoryCatalog) -> None:
356356
catalog.create_namespace(TEST_TABLE_NAMESPACE, TEST_TABLE_PROPERTIES)
357357

358358
# Then
359-
assert TEST_TABLE_NAMESPACE in catalog.list_namespaces()
359+
assert catalog._namespace_exists(TEST_TABLE_NAMESPACE)
360360
assert TEST_TABLE_PROPERTIES == catalog.load_namespace_properties(TEST_TABLE_NAMESPACE)
361361

362362

@@ -379,7 +379,12 @@ def test_list_namespaces(catalog: InMemoryCatalog) -> None:
379379
# When
380380
namespaces = catalog.list_namespaces()
381381
# Then
382-
assert TEST_TABLE_NAMESPACE in namespaces
382+
assert TEST_TABLE_NAMESPACE[:1] in namespaces
383+
384+
# When
385+
namespaces = catalog.list_namespaces(TEST_TABLE_NAMESPACE)
386+
# Then
387+
assert not namespaces
383388

384389

385390
def test_drop_namespace(catalog: InMemoryCatalog) -> None:
@@ -388,7 +393,7 @@ def test_drop_namespace(catalog: InMemoryCatalog) -> None:
388393
# When
389394
catalog.drop_namespace(TEST_TABLE_NAMESPACE)
390395
# Then
391-
assert TEST_TABLE_NAMESPACE not in catalog.list_namespaces()
396+
assert not catalog._namespace_exists(TEST_TABLE_NAMESPACE)
392397

393398

394399
def test_drop_namespace_raises_error_when_namespace_does_not_exist(catalog: InMemoryCatalog) -> None:
@@ -437,7 +442,7 @@ def test_update_namespace_metadata(catalog: InMemoryCatalog) -> None:
437442
summary = catalog.update_namespace_properties(TEST_TABLE_NAMESPACE, updates=new_metadata)
438443

439444
# Then
440-
assert TEST_TABLE_NAMESPACE in catalog.list_namespaces()
445+
assert catalog._namespace_exists(TEST_TABLE_NAMESPACE)
441446
assert new_metadata.items() <= catalog.load_namespace_properties(TEST_TABLE_NAMESPACE).items()
442447
assert summary.removed == []
443448
assert sorted(summary.updated) == ["key3", "key4"]
@@ -454,7 +459,7 @@ def test_update_namespace_metadata_removals(catalog: InMemoryCatalog) -> None:
454459
summary = catalog.update_namespace_properties(TEST_TABLE_NAMESPACE, remove_metadata, new_metadata)
455460

456461
# Then
457-
assert TEST_TABLE_NAMESPACE in catalog.list_namespaces()
462+
assert catalog._namespace_exists(TEST_TABLE_NAMESPACE)
458463
assert new_metadata.items() <= catalog.load_namespace_properties(TEST_TABLE_NAMESPACE).items()
459464
assert remove_metadata.isdisjoint(catalog.load_namespace_properties(TEST_TABLE_NAMESPACE).keys())
460465
assert summary.removed == ["key1"]

tests/catalog/test_sql.py

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
import os
1919
from pathlib import Path
20-
from typing import Any, Generator, List, cast
20+
from typing import Any, Generator, cast
2121

2222
import pyarrow as pa
2323
import pytest
@@ -1027,7 +1027,7 @@ def test_create_namespace_if_not_exists(catalog: SqlCatalog, database_name: str)
10271027
@pytest.mark.parametrize("namespace", [lazy_fixture("database_name"), lazy_fixture("hierarchical_namespace_name")])
10281028
def test_create_namespace(catalog: SqlCatalog, namespace: str) -> None:
10291029
catalog.create_namespace(namespace)
1030-
assert (Catalog.identifier_to_tuple(namespace)) in catalog.list_namespaces()
1030+
assert (Catalog.identifier_to_tuple(namespace)[:1]) in catalog.list_namespaces()
10311031

10321032

10331033
@pytest.mark.parametrize(
@@ -1074,7 +1074,7 @@ def test_create_namespace_with_comment_and_location(catalog: SqlCatalog, namespa
10741074
}
10751075
catalog.create_namespace(namespace=namespace, properties=test_properties)
10761076
loaded_database_list = catalog.list_namespaces()
1077-
assert Catalog.identifier_to_tuple(namespace) in loaded_database_list
1077+
assert Catalog.identifier_to_tuple(namespace)[:1] in loaded_database_list
10781078
properties = catalog.load_namespace_properties(namespace)
10791079
assert properties["comment"] == "this is a test description"
10801080
assert properties["location"] == test_location
@@ -1135,17 +1135,42 @@ def test_namespace_exists(catalog: SqlCatalog) -> None:
11351135
lazy_fixture("catalog_sqlite"),
11361136
],
11371137
)
1138-
@pytest.mark.parametrize("namespace_list", [lazy_fixture("database_list"), lazy_fixture("hierarchical_namespace_list")])
1139-
def test_list_namespaces(catalog: SqlCatalog, namespace_list: List[str]) -> None:
1138+
def test_list_namespaces(catalog: SqlCatalog) -> None:
1139+
namespace_list = ["db", "db.ns1", "db.ns1.ns2", "db.ns2", "db2", "db2.ns1", "db%"]
11401140
for namespace in namespace_list:
1141-
catalog.create_namespace(namespace)
1142-
# Test global list
1141+
if not catalog._namespace_exists(namespace):
1142+
catalog.create_namespace(namespace)
1143+
11431144
ns_list = catalog.list_namespaces()
1145+
for ns in [("db",), ("db%",), ("db2",)]:
1146+
assert ns in ns_list
1147+
1148+
ns_list = catalog.list_namespaces("db")
1149+
assert sorted(ns_list) == [("db", "ns1"), ("db", "ns2")]
1150+
1151+
ns_list = catalog.list_namespaces("db.ns1")
1152+
assert sorted(ns_list) == [("db", "ns1", "ns2")]
1153+
1154+
ns_list = catalog.list_namespaces("db.ns1.ns2")
1155+
assert len(ns_list) == 0
1156+
1157+
1158+
@pytest.mark.parametrize(
1159+
"catalog",
1160+
[
1161+
lazy_fixture("catalog_memory"),
1162+
lazy_fixture("catalog_sqlite"),
1163+
],
1164+
)
1165+
def test_list_namespaces_fuzzy_match(catalog: SqlCatalog) -> None:
1166+
namespace_list = ["db.ns1", "db.ns1.ns2", "db.ns2", "db.ns1X.ns3", "db_.ns1.ns2", "db2.ns1.ns2"]
11441167
for namespace in namespace_list:
1145-
assert Catalog.identifier_to_tuple(namespace) in ns_list
1146-
# Test individual namespace list
1147-
assert len(one_namespace := catalog.list_namespaces(namespace)) == 1
1148-
assert Catalog.identifier_to_tuple(namespace) == one_namespace[0]
1168+
if not catalog._namespace_exists(namespace):
1169+
catalog.create_namespace(namespace)
1170+
1171+
assert catalog.list_namespaces("db.ns1") == [("db", "ns1", "ns2")]
1172+
1173+
assert catalog.list_namespaces("db_.ns1") == [("db_", "ns1", "ns2")]
11491174

11501175

11511176
@pytest.mark.parametrize(
@@ -1177,13 +1202,13 @@ def test_list_non_existing_namespaces(catalog: SqlCatalog) -> None:
11771202
def test_drop_namespace(catalog: SqlCatalog, table_schema_nested: Schema, table_identifier: Identifier) -> None:
11781203
namespace = Catalog.namespace_from(table_identifier)
11791204
catalog.create_namespace(namespace)
1180-
assert namespace in catalog.list_namespaces()
1205+
assert catalog._namespace_exists(namespace)
11811206
catalog.create_table(table_identifier, table_schema_nested)
11821207
with pytest.raises(NamespaceNotEmptyError):
11831208
catalog.drop_namespace(namespace)
11841209
catalog.drop_table(table_identifier)
11851210
catalog.drop_namespace(namespace)
1186-
assert namespace not in catalog.list_namespaces()
1211+
assert not catalog._namespace_exists(namespace)
11871212

11881213

11891214
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)