From 730f0f2a678e30c033345a6ffc7915b168f7c21e Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Thu, 26 Jun 2025 11:03:36 -0700 Subject: [PATCH 01/18] Added view metadata. --- pyiceberg/table/metadata.py | 63 +++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index 9c2ae29cdd..b65639533c 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -684,3 +684,66 @@ def _construct_without_validation(table_metadata: TableMetadata) -> TableMetadat return TableMetadataV3.model_construct(**dict(table_metadata)) else: raise ValidationError(f"Unknown format version: {table_metadata.format_version}") + +class ViewRepresentation(IcebergBaseModel): + """Represents the SQL query that defines the view.""" + + type: Literal["sql"] = Field() + """A string that indicates the type of representation. Must be `sql`""" + sql: str = Field() + """A string that contains the SQL text of the view definition.""" + dialect: str = Field() + """The dialect of the SQL, e.g. `spark`, `trino`, `presto`.""" + + +class ViewVersion(IcebergBaseModel): + """A version of the view definition.""" + + version_id: int = Field(alias="version-id") + """ID for the version""" + schema_id: int = Field(alias="schema-id") + """ID of the schema for the view version""" + timestamp_ms: int = Field(alias="timestamp-ms") + """Timestamp when the version was created (ms from epoch)""" + summary: Dict[str, str] = Field() + """A string to string map of summary metadata about the version""" + representations: List[ViewRepresentation] = Field() + """A list of representations for the view definition""" + default_catalog: Optional[str] = Field(alias="default-catalog", default=None) + """Catalog name to use when a reference in the SELECT does not contain a catalog""" + default_namespace: List[str] = Field(alias="default-namespace") + """Namespace to use when a reference in the SELECT is a single identifier""" + + +class ViewVersionLogEntry(IcebergBaseModel): + """A log entry of a view version change.""" + + timestamp_ms: int = Field(alias="timestamp-ms") + """Timestamp when the version was created (ms from epoch)""" + version_id: int = Field(alias="version-id") + """ID for the version""" + + +class ViewMetadata(IcebergBaseModel): + """The metadata for a view.""" + + view_uuid: str = Field(alias="view-uuid") + """A UUID that identifies the view, generated when the view is created.""" + format_version: Literal[1] = Field(alias="format-version") + """An integer version number for the view format; must be 1""" + location: str = Field() + """The view's base location; used to create metadata file locations""" + schemas: List[Schema] = Field() + """A list of known schemas""" + current_version_id: int = Field(alias="current-version-id") + """ID of the current version of the view (version-id)""" + versions: List[ViewVersion] = Field() + """A list of known versions of the view""" + version_log: List[ViewVersionLogEntry] = Field(alias="version-log") + """A list of version log entries""" + properties: Properties = Field(default_factory=dict) + """A string to string map of view properties""" + + @field_validator("properties", mode="before") + def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: + return transform_dict_value_to_str(properties) \ No newline at end of file From 01ddfacba2fbe3b06d7d93e3c4a096a2ccfa6271 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Thu, 26 Jun 2025 11:33:03 -0700 Subject: [PATCH 02/18] Added tests and View class. --- pyiceberg/catalog/rest/__init__.py | 73 ++++++++++++++++++++++++++- pyiceberg/exceptions.py | 3 ++ pyiceberg/view/__init__.py | 57 +++++++++++++++++++++ tests/catalog/test_rest.py | 81 ++++++++++++++++++++++++++++++ 4 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 pyiceberg/view/__init__.py diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 0972d7792f..cb0ab8c0ef 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -53,6 +53,7 @@ NoSuchViewError, TableAlreadyExistsError, UnauthorizedError, + ViewAlreadyExistsError, ) from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec, assign_fresh_partition_spec_ids @@ -65,7 +66,7 @@ Table, TableIdentifier, ) -from pyiceberg.table.metadata import TableMetadata +from pyiceberg.table.metadata import TableMetadata, ViewMetadata, ViewVersion from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder, assign_fresh_sort_order_ids from pyiceberg.table.update import ( TableRequirement, @@ -75,6 +76,7 @@ from pyiceberg.types import transform_dict_value_to_str from pyiceberg.utils.deprecated import deprecation_message from pyiceberg.utils.properties import get_first_property_value, get_header_properties, property_as_bool +from pyiceberg.view import View if TYPE_CHECKING: import pyarrow as pa @@ -100,6 +102,7 @@ class Endpoints: get_token: str = "oauth/tokens" rename_table: str = "tables/rename" list_views: str = "namespaces/{namespace}/views" + create_view: str = "namespaces/{namespace}/views" drop_view: str = "namespaces/{namespace}/views/{view}" view_exists: str = "namespaces/{namespace}/views/{view}" @@ -157,6 +160,12 @@ class TableResponse(IcebergBaseModel): config: Properties = Field(default_factory=dict) +class ViewResponse(IcebergBaseModel): + metadata_location: Optional[str] = Field(alias="metadata-location", default=None) + metadata: ViewMetadata + config: Properties = Field(default_factory=dict) + + class CreateTableRequest(IcebergBaseModel): name: str = Field() location: Optional[str] = Field() @@ -172,6 +181,19 @@ def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[ return transform_dict_value_to_str(properties) +class CreateViewRequest(IcebergBaseModel): + name: str = Field() + location: Optional[str] = Field() + view_schema: Schema = Field(alias="schema") + view_version: ViewVersion = Field(alias="view-version") + properties: Dict[str, str] = Field(default_factory=dict) + + # validators + @field_validator("properties", mode="before") + def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: + return transform_dict_value_to_str(properties) + + class RegisterTableRequest(IcebergBaseModel): name: str metadata_location: str = Field(..., alias="metadata-location") @@ -454,6 +476,17 @@ def _response_to_staged_table(self, identifier_tuple: Tuple[str, ...], table_res catalog=self, ) + def _response_to_view(self, identifier_tuple: Tuple[str, ...], view_response: ViewResponse) -> View: + return View( + identifier=identifier_tuple, + metadata_location=view_response.metadata_location, # type: ignore + metadata=view_response.metadata, + io=self._load_file_io( + {**view_response.metadata.properties, **view_response.config}, view_response.metadata_location + ), + catalog=self, + ) + def _refresh_token(self) -> None: # Reactive token refresh is atypical - we should proactively refresh tokens in a separate thread # instead of retrying on Auth Exceptions. Keeping refresh behavior for the LegacyOAuth2AuthManager @@ -551,6 +584,44 @@ def create_table_transaction( staged_table = self._response_to_staged_table(self.identifier_to_tuple(identifier), table_response) return CreateTableTransaction(staged_table) + @retry(**_RETRY_ARGS) + def create_view( + self, + identifier: Union[str, Identifier], + schema: Union[Schema, "pa.Schema"], + view_version: ViewVersion, + location: Optional[str] = None, + properties: Properties = EMPTY_DICT, + ) -> View: + iceberg_schema = self._convert_schema_if_needed(schema) + fresh_schema = assign_fresh_schema_ids(iceberg_schema) + + namespace_and_view = self._split_identifier_for_path(identifier, IdentifierKind.VIEW) + if location: + location = location.rstrip("/") + + request = CreateViewRequest( + name=namespace_and_view["view"], + location=location, + view_schema=fresh_schema, + view_version=view_version, + properties=properties, + ) + + serialized_json = request.model_dump_json().encode(UTF8) + response = self._session.post( + self.url(Endpoints.create_view, namespace=namespace_and_view["namespace"]), + data=serialized_json, + ) + + try: + response.raise_for_status() + except HTTPError as exc: + _handle_non_200_response(exc, {409: ViewAlreadyExistsError}) + + view_response = ViewResponse.model_validate_json(response.text) + return self._response_to_view(self.identifier_to_tuple(identifier), view_response) + @retry(**_RETRY_ARGS) def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table: """Register a new table using existing metadata. diff --git a/pyiceberg/exceptions.py b/pyiceberg/exceptions.py index c80f104e46..f8ca4effe8 100644 --- a/pyiceberg/exceptions.py +++ b/pyiceberg/exceptions.py @@ -19,6 +19,9 @@ class TableAlreadyExistsError(Exception): """Raised when creating a table with a name that already exists.""" +class ViewAlreadyExistsError(Exception): + """Raised when creating a view with a name that already exists.""" + class NamespaceNotEmptyError(Exception): """Raised when a name-space being dropped is not empty.""" diff --git a/pyiceberg/view/__init__.py b/pyiceberg/view/__init__.py new file mode 100644 index 0000000000..ee345865d3 --- /dev/null +++ b/pyiceberg/view/__init__.py @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Dict, +) + +from pydantic import Field + +from pyiceberg.io import FileIO +from pyiceberg.table.metadata import ViewMetadata +from pyiceberg.typedef import EMPTY_DICT, Identifier + +class View: + """An Iceberg view.""" + + _identifier: Identifier = Field() + metadata: ViewMetadata + metadata_location: str = Field() + io: FileIO + + def __init__( + self, + identifier: Identifier, + metadata: ViewMetadata, + ) -> None: + self._identifier = identifier + self.metadata = metadata + + def name(self) -> Identifier: + """Return the identifier of this view.""" + return self._identifier + + def __eq__(self, other: Any) -> bool: + """Return the equality of two instances of the View class.""" + return ( + self.name() == other.name() and self.metadata == other.metadata + if isinstance(other, View) + else False + ) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index ed91dd15a1..1798b841b4 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -36,12 +36,14 @@ OAuthError, ServerError, TableAlreadyExistsError, + ViewAlreadyExistsError, ) from pyiceberg.io import load_file_io from pyiceberg.partitioning import PartitionField, PartitionSpec from pyiceberg.schema import Schema from pyiceberg.table import Table from pyiceberg.table.metadata import TableMetadataV1 +from pyiceberg.table.view import View, ViewMetadata, ViewVersion from pyiceberg.table.sorting import SortField, SortOrder from pyiceberg.transforms import IdentityTransform, TruncateTransform from pyiceberg.typedef import RecursiveDict @@ -102,6 +104,18 @@ def example_table_metadata_no_snapshot_v1_rest_json(example_table_metadata_no_sn } +@pytest.fixture +def example_view_metadata_rest_json(example_view_metadata_v1: Dict[str, Any]) -> Dict[str, Any]: + return { + "metadata-location": "s3://warehouse/database/table/metadata/00001-5f2f8166-244c-4eae-ac36-384ecdec81fc.gz.metadata.json", + "metadata": example_view_metadata_v1, + "config": { + "client.factory": "io.tabular.iceberg.catalog.TabularAwsClientFactory", + "region": "us-west-2", + }, + } + + @pytest.fixture def rest_mock(requests_mock: Mocker) -> Mocker: """Takes the default requests_mock and adds the config endpoint to it @@ -1162,6 +1176,73 @@ def test_create_table_409(rest_mock: Mocker, table_schema_simple: Schema) -> Non assert "Table already exists" in str(e.value) +def test_create_view_200( + rest_mock: Mocker, table_schema_simple: Schema, example_view_metadata_rest_json: Dict[str, Any] +) -> None: + rest_mock.post( + f"{TEST_URI}v1/namespaces/fokko/views", + json=example_view_metadata_rest_json, + status_code=200, + request_headers=TEST_HEADERS, + ) + catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN) + actual = catalog.create_view( + identifier=("fokko", "fokko2"), + schema=table_schema_simple, + view_version=ViewVersion( + version_id=1, + timestamp_ms=12345, + schema_id=1, + summary={"engine-name": "spark", "engineVersion": "3.3"}, + representations=[], + ), + location=None, + properties={"owner": "fokko"}, + ) + expected = View( + identifier=("fokko", "fokko2"), + metadata_location=example_view_metadata_rest_json["metadata-location"], + metadata=ViewMetadata(**example_view_metadata_rest_json["metadata"]), + io=load_file_io(), + catalog=catalog, + ) + assert actual == expected + + +def test_create_view_409( + rest_mock: Mocker, + table_schema_simple: Schema, +) -> None: + rest_mock.post( + f"{TEST_URI}v1/namespaces/fokko/views", + json={ + "error": { + "message": "View already exists: fokko.already_exists in warehouse 8bcb0838-50fc-472d-9ddb-8feb89ef5f1e", + "type": "AlreadyExistsException", + "code": 409, + } + }, + status_code=409, + request_headers=TEST_HEADERS, + ) + + with pytest.raises(ViewAlreadyExistsError) as e: + RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).create_view( + identifier=("fokko", "fokko2"), + schema=table_schema_simple, + view_version=ViewVersion( + version_id=1, + timestamp_ms=12345, + schema_id=1, + summary={"engine-name": "spark", "engineVersion": "3.3"}, + representations=[], + ), + location=None, + properties={"owner": "fokko"}, + ) + assert "View already exists" in str(e.value) + + def test_create_table_if_not_exists_200( rest_mock: Mocker, table_schema_simple: Schema, example_table_metadata_no_snapshot_v1_rest_json: Dict[str, Any] ) -> None: From e248b148fb794051134adcc71b5bf605d865475f Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Thu, 26 Jun 2025 13:21:22 -0700 Subject: [PATCH 03/18] move view metadata over to separate folder --- pyiceberg/catalog/rest/__init__.py | 7 +-- pyiceberg/exceptions.py | 1 + pyiceberg/table/metadata.py | 63 --------------------- pyiceberg/view/__init__.py | 16 ++---- pyiceberg/view/metadata.py | 89 ++++++++++++++++++++++++++++++ tests/catalog/test_rest.py | 10 +--- 6 files changed, 100 insertions(+), 86 deletions(-) create mode 100644 pyiceberg/view/metadata.py diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index cb0ab8c0ef..a0f6acdeb9 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -66,7 +66,7 @@ Table, TableIdentifier, ) -from pyiceberg.table.metadata import TableMetadata, ViewMetadata, ViewVersion +from pyiceberg.table.metadata import TableMetadata from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder, assign_fresh_sort_order_ids from pyiceberg.table.update import ( TableRequirement, @@ -77,6 +77,7 @@ from pyiceberg.utils.deprecated import deprecation_message from pyiceberg.utils.properties import get_first_property_value, get_header_properties, property_as_bool from pyiceberg.view import View +from pyiceberg.view.metadata import ViewMetadata, ViewVersion if TYPE_CHECKING: import pyarrow as pa @@ -481,9 +482,7 @@ def _response_to_view(self, identifier_tuple: Tuple[str, ...], view_response: Vi identifier=identifier_tuple, metadata_location=view_response.metadata_location, # type: ignore metadata=view_response.metadata, - io=self._load_file_io( - {**view_response.metadata.properties, **view_response.config}, view_response.metadata_location - ), + io=self._load_file_io({**view_response.metadata.properties, **view_response.config}, view_response.metadata_location), catalog=self, ) diff --git a/pyiceberg/exceptions.py b/pyiceberg/exceptions.py index f8ca4effe8..cec47e5911 100644 --- a/pyiceberg/exceptions.py +++ b/pyiceberg/exceptions.py @@ -19,6 +19,7 @@ class TableAlreadyExistsError(Exception): """Raised when creating a table with a name that already exists.""" + class ViewAlreadyExistsError(Exception): """Raised when creating a view with a name that already exists.""" diff --git a/pyiceberg/table/metadata.py b/pyiceberg/table/metadata.py index b65639533c..9c2ae29cdd 100644 --- a/pyiceberg/table/metadata.py +++ b/pyiceberg/table/metadata.py @@ -684,66 +684,3 @@ def _construct_without_validation(table_metadata: TableMetadata) -> TableMetadat return TableMetadataV3.model_construct(**dict(table_metadata)) else: raise ValidationError(f"Unknown format version: {table_metadata.format_version}") - -class ViewRepresentation(IcebergBaseModel): - """Represents the SQL query that defines the view.""" - - type: Literal["sql"] = Field() - """A string that indicates the type of representation. Must be `sql`""" - sql: str = Field() - """A string that contains the SQL text of the view definition.""" - dialect: str = Field() - """The dialect of the SQL, e.g. `spark`, `trino`, `presto`.""" - - -class ViewVersion(IcebergBaseModel): - """A version of the view definition.""" - - version_id: int = Field(alias="version-id") - """ID for the version""" - schema_id: int = Field(alias="schema-id") - """ID of the schema for the view version""" - timestamp_ms: int = Field(alias="timestamp-ms") - """Timestamp when the version was created (ms from epoch)""" - summary: Dict[str, str] = Field() - """A string to string map of summary metadata about the version""" - representations: List[ViewRepresentation] = Field() - """A list of representations for the view definition""" - default_catalog: Optional[str] = Field(alias="default-catalog", default=None) - """Catalog name to use when a reference in the SELECT does not contain a catalog""" - default_namespace: List[str] = Field(alias="default-namespace") - """Namespace to use when a reference in the SELECT is a single identifier""" - - -class ViewVersionLogEntry(IcebergBaseModel): - """A log entry of a view version change.""" - - timestamp_ms: int = Field(alias="timestamp-ms") - """Timestamp when the version was created (ms from epoch)""" - version_id: int = Field(alias="version-id") - """ID for the version""" - - -class ViewMetadata(IcebergBaseModel): - """The metadata for a view.""" - - view_uuid: str = Field(alias="view-uuid") - """A UUID that identifies the view, generated when the view is created.""" - format_version: Literal[1] = Field(alias="format-version") - """An integer version number for the view format; must be 1""" - location: str = Field() - """The view's base location; used to create metadata file locations""" - schemas: List[Schema] = Field() - """A list of known schemas""" - current_version_id: int = Field(alias="current-version-id") - """ID of the current version of the view (version-id)""" - versions: List[ViewVersion] = Field() - """A list of known versions of the view""" - version_log: List[ViewVersionLogEntry] = Field(alias="version-log") - """A list of version log entries""" - properties: Properties = Field(default_factory=dict) - """A string to string map of view properties""" - - @field_validator("properties", mode="before") - def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: - return transform_dict_value_to_str(properties) \ No newline at end of file diff --git a/pyiceberg/view/__init__.py b/pyiceberg/view/__init__.py index ee345865d3..d65fc6b114 100644 --- a/pyiceberg/view/__init__.py +++ b/pyiceberg/view/__init__.py @@ -17,24 +17,20 @@ from __future__ import annotations from typing import ( - TYPE_CHECKING, Any, - Dict, ) from pydantic import Field -from pyiceberg.io import FileIO -from pyiceberg.table.metadata import ViewMetadata -from pyiceberg.typedef import EMPTY_DICT, Identifier +from pyiceberg.typedef import Identifier +from pyiceberg.view.metadata import ViewMetadata + class View: """An Iceberg view.""" _identifier: Identifier = Field() metadata: ViewMetadata - metadata_location: str = Field() - io: FileIO def __init__( self, @@ -50,8 +46,4 @@ def name(self) -> Identifier: def __eq__(self, other: Any) -> bool: """Return the equality of two instances of the View class.""" - return ( - self.name() == other.name() and self.metadata == other.metadata - if isinstance(other, View) - else False - ) + return self.name() == other.name() and self.metadata == other.metadata if isinstance(other, View) else False diff --git a/pyiceberg/view/metadata.py b/pyiceberg/view/metadata.py new file mode 100644 index 0000000000..af331eb7a8 --- /dev/null +++ b/pyiceberg/view/metadata.py @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from typing import Dict, List, Literal, Optional + +from pydantic import Field, field_validator + +from pyiceberg.schema import Schema +from pyiceberg.typedef import IcebergBaseModel, Properties +from pyiceberg.types import transform_dict_value_to_str + + +class ViewRepresentation(IcebergBaseModel): + """Represents the SQL query that defines the view.""" + + type: Literal["sql"] = Field() + """A string that indicates the type of representation. Must be `sql`""" + sql: str = Field() + """A string that contains the SQL text of the view definition.""" + dialect: str = Field() + """The dialect of the SQL, e.g. `spark`, `trino`, `presto`.""" + + +class ViewVersion(IcebergBaseModel): + """A version of the view definition.""" + + version_id: int = Field(alias="version-id") + """ID for the version""" + schema_id: int = Field(alias="schema-id") + """ID of the schema for the view version""" + timestamp_ms: int = Field(alias="timestamp-ms") + """Timestamp when the version was created (ms from epoch)""" + summary: Dict[str, str] = Field() + """A string to string map of summary metadata about the version""" + representations: List[ViewRepresentation] = Field() + """A list of representations for the view definition""" + default_catalog: Optional[str] = Field(alias="default-catalog", default=None) + """Catalog name to use when a reference in the SELECT does not contain a catalog""" + default_namespace: List[str] = Field(alias="default-namespace") + """Namespace to use when a reference in the SELECT is a single identifier""" + + +class ViewVersionLogEntry(IcebergBaseModel): + """A log entry of a view version change.""" + + timestamp_ms: int = Field(alias="timestamp-ms") + """Timestamp when the version was created (ms from epoch)""" + version_id: int = Field(alias="version-id") + """ID for the version""" + + +class ViewMetadata(IcebergBaseModel): + """The metadata for a view.""" + + view_uuid: str = Field(alias="view-uuid") + """A UUID that identifies the view, generated when the view is created.""" + format_version: Literal[1] = Field(alias="format-version") + """An integer version number for the view format; must be 1""" + location: str = Field() + """The view's base location; used to create metadata file locations""" + schemas: List[Schema] = Field() + """A list of known schemas""" + current_version_id: int = Field(alias="current-version-id") + """ID of the current version of the view (version-id)""" + versions: List[ViewVersion] = Field() + """A list of known versions of the view""" + version_log: List[ViewVersionLogEntry] = Field(alias="version-log") + """A list of version log entries""" + properties: Properties = Field(default_factory=dict) + """A string to string map of view properties""" + + @field_validator("properties", mode="before") + def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: + return transform_dict_value_to_str(properties) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 1798b841b4..82190e476c 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -43,11 +43,12 @@ from pyiceberg.schema import Schema from pyiceberg.table import Table from pyiceberg.table.metadata import TableMetadataV1 -from pyiceberg.table.view import View, ViewMetadata, ViewVersion from pyiceberg.table.sorting import SortField, SortOrder from pyiceberg.transforms import IdentityTransform, TruncateTransform from pyiceberg.typedef import RecursiveDict from pyiceberg.utils.config import Config +from pyiceberg.view import View +from pyiceberg.view.metadata import ViewMetadata, ViewVersion TEST_URI = "https://iceberg-test-catalog/" TEST_CREDENTIALS = "client:secret" @@ -1176,9 +1177,7 @@ def test_create_table_409(rest_mock: Mocker, table_schema_simple: Schema) -> Non assert "Table already exists" in str(e.value) -def test_create_view_200( - rest_mock: Mocker, table_schema_simple: Schema, example_view_metadata_rest_json: Dict[str, Any] -) -> None: +def test_create_view_200(rest_mock: Mocker, table_schema_simple: Schema, example_view_metadata_rest_json: Dict[str, Any]) -> None: rest_mock.post( f"{TEST_URI}v1/namespaces/fokko/views", json=example_view_metadata_rest_json, @@ -1201,10 +1200,7 @@ def test_create_view_200( ) expected = View( identifier=("fokko", "fokko2"), - metadata_location=example_view_metadata_rest_json["metadata-location"], metadata=ViewMetadata(**example_view_metadata_rest_json["metadata"]), - io=load_file_io(), - catalog=catalog, ) assert actual == expected From 0238e5b604eb59e2cc745b54f0797988f6b8bb58 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Thu, 26 Jun 2025 13:36:01 -0700 Subject: [PATCH 04/18] tests passing --- pyiceberg/catalog/rest/__init__.py | 3 --- tests/catalog/test_rest.py | 10 +++++++- tests/conftest.py | 39 ++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index a0f6acdeb9..fffce16fd1 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -480,10 +480,7 @@ def _response_to_staged_table(self, identifier_tuple: Tuple[str, ...], table_res def _response_to_view(self, identifier_tuple: Tuple[str, ...], view_response: ViewResponse) -> View: return View( identifier=identifier_tuple, - metadata_location=view_response.metadata_location, # type: ignore metadata=view_response.metadata, - io=self._load_file_io({**view_response.metadata.properties, **view_response.config}, view_response.metadata_location), - catalog=self, ) def _refresh_token(self) -> None: diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 82190e476c..08ae47b3cc 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -1193,7 +1193,14 @@ def test_create_view_200(rest_mock: Mocker, table_schema_simple: Schema, example timestamp_ms=12345, schema_id=1, summary={"engine-name": "spark", "engineVersion": "3.3"}, - representations=[], + representations=[ + { + "type": "sql", + "sql": "SELECT * FROM prod.db.table", + "dialect": "spark", + } + ], + default_namespace=["default"], ), location=None, properties={"owner": "fokko"}, @@ -1232,6 +1239,7 @@ def test_create_view_409( schema_id=1, summary={"engine-name": "spark", "engineVersion": "3.3"}, representations=[], + default_namespace=[], ), location=None, properties={"owner": "fokko"}, diff --git a/tests/conftest.py b/tests/conftest.py index 584b6c633a..f74676e965 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1127,6 +1127,45 @@ def table_metadata_v2_with_statistics() -> Dict[str, Any]: return TABLE_METADATA_V2_WITH_STATISTICS +@pytest.fixture +def example_view_metadata_v1() -> Dict[str, Any]: + return { + "view-uuid": "a20125c8-7284-442c-9aea-15fee620737c", + "format-version": 1, + "location": "s3://bucket/test/location/test_view", + "current-version-id": 1, + "versions": [ + { + "version-id": 1, + "timestamp-ms": 1602638573874, + "schema-id": 1, + "summary": {"engine-name": "spark", "engineVersion": "3.3"}, + "representations": [ + { + "type": "sql", + "sql": "SELECT * FROM prod.db.table", + "dialect": "spark", + } + ], + "default-namespace": ["default"], + } + ], + "schemas": [ + { + "type": "struct", + "schema-id": 1, + "fields": [ + {"id": 1, "name": "x", "required": True, "type": "long"}, + {"id": 2, "name": "y", "required": True, "type": "long", "doc": "comment"}, + {"id": 3, "name": "z", "required": True, "type": "long"}, + ], + } + ], + "version-log": [{"timestamp-ms": 1602638573874, "version-id": 1}], + "properties": {"comment": "this is a test view"}, + } + + @pytest.fixture def example_table_metadata_v3() -> Dict[str, Any]: return EXAMPLE_TABLE_METADATA_V3 From f22bd94ca2f566f74107f664ad9cc3ed68153650 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Thu, 26 Jun 2025 13:38:15 -0700 Subject: [PATCH 05/18] remove stray comment --- pyiceberg/catalog/rest/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index fffce16fd1..520917101b 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -189,7 +189,6 @@ class CreateViewRequest(IcebergBaseModel): view_version: ViewVersion = Field(alias="view-version") properties: Dict[str, str] = Field(default_factory=dict) - # validators @field_validator("properties", mode="before") def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: return transform_dict_value_to_str(properties) From 4bde3a9bdee247a6cb651381b8e18bb69e004db9 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Thu, 26 Jun 2025 13:51:33 -0700 Subject: [PATCH 06/18] Add abstract method --- pyiceberg/catalog/__init__.py | 28 ++++++++++++++++++++++++++++ pyiceberg/catalog/dynamodb.py | 12 ++++++++++++ pyiceberg/catalog/glue.py | 12 ++++++++++++ pyiceberg/catalog/hive.py | 12 ++++++++++++ pyiceberg/catalog/noop.py | 12 ++++++++++++ pyiceberg/catalog/sql.py | 12 ++++++++++++ 6 files changed, 88 insertions(+) diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index 4da116434e..8752eafdcc 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -73,6 +73,8 @@ ) from pyiceberg.utils.config import Config, merge_config from pyiceberg.utils.properties import property_as_bool +from pyiceberg.view import View +from pyiceberg.view.metadata import ViewVersion if TYPE_CHECKING: import pyarrow as pa @@ -657,6 +659,32 @@ def drop_view(self, identifier: Union[str, Identifier]) -> None: NoSuchViewError: If a view with the given name does not exist. """ + @abstractmethod + def create_view( + self, + identifier: Union[str, Identifier], + schema: Union[Schema, "pa.Schema"], + view_version: ViewVersion, + location: Optional[str] = None, + properties: Properties = EMPTY_DICT, + ) -> View: + """Create a view. + + Args: + identifier (str | Identifier): View identifier. + schema (Schema): View's schema. + location (str | None): Location for the view. Optional Argument. + partition_spec (PartitionSpec): PartitionSpec for the view. + sort_order (SortOrder): SortOrder for the view. + properties (Properties): View properties that can be a string based dictionary. + + Returns: + View: the created view instance. + + Raises: + ViewAlreadyExistsError: If a view with the name already exists. + """ + @staticmethod def identifier_to_tuple(identifier: Union[str, Identifier]) -> Identifier: """Parse an identifier to a tuple. diff --git a/pyiceberg/catalog/dynamodb.py b/pyiceberg/catalog/dynamodb.py index 3b37762638..e441fd9806 100644 --- a/pyiceberg/catalog/dynamodb.py +++ b/pyiceberg/catalog/dynamodb.py @@ -63,6 +63,8 @@ ) from pyiceberg.typedef import EMPTY_DICT, Identifier, Properties from pyiceberg.utils.properties import get_first_property_value +from pyiceberg.view import View +from pyiceberg.view.metadata import ViewVersion if TYPE_CHECKING: import pyarrow as pa @@ -538,6 +540,16 @@ def update_namespace_properties( return properties_update_summary + def create_view( + self, + identifier: Union[str, Identifier], + schema: Union[Schema, "pa.Schema"], + view_version: ViewVersion, + location: Optional[str] = None, + properties: Properties = EMPTY_DICT, + ) -> View: + raise NotImplementedError + def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]: raise NotImplementedError diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 4eb4164e57..6a8a967044 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -89,6 +89,8 @@ UUIDType, ) from pyiceberg.utils.properties import get_first_property_value, property_as_bool +from pyiceberg.view import View +from pyiceberg.view.metadata import ViewVersion if TYPE_CHECKING: import pyarrow as pa @@ -817,6 +819,16 @@ def update_namespace_properties( return properties_update_summary + def create_view( + self, + identifier: Union[str, Identifier], + schema: Union[Schema, "pa.Schema"], + view_version: ViewVersion, + location: Optional[str] = None, + properties: Properties = EMPTY_DICT, + ) -> View: + raise NotImplementedError + def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]: raise NotImplementedError diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index eef6bbad18..b52b318695 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -116,6 +116,8 @@ UUIDType, ) from pyiceberg.utils.properties import property_as_bool, property_as_float +from pyiceberg.view import View +from pyiceberg.view.metadata import ViewVersion if TYPE_CHECKING: import pyarrow as pa @@ -439,6 +441,16 @@ def create_table( return self._convert_hive_into_iceberg(hive_table) + def create_view( + self, + identifier: Union[str, Identifier], + schema: Union[Schema, "pa.Schema"], + view_version: ViewVersion, + location: Optional[str] = None, + properties: Properties = EMPTY_DICT, + ) -> View: + raise NotImplementedError + def register_table(self, identifier: Union[str, Identifier], metadata_location: str) -> Table: """Register a new table using existing metadata. diff --git a/pyiceberg/catalog/noop.py b/pyiceberg/catalog/noop.py index 39a0b382a7..fb1dc77e6a 100644 --- a/pyiceberg/catalog/noop.py +++ b/pyiceberg/catalog/noop.py @@ -37,6 +37,8 @@ TableUpdate, ) from pyiceberg.typedef import EMPTY_DICT, Identifier, Properties +from pyiceberg.view import View +from pyiceberg.view.metadata import ViewVersion if TYPE_CHECKING: import pyarrow as pa @@ -126,5 +128,15 @@ def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]: def view_exists(self, identifier: Union[str, Identifier]) -> bool: raise NotImplementedError + def create_view( + self, + identifier: Union[str, Identifier], + schema: Union[Schema, "pa.Schema"], + view_version: ViewVersion, + location: Optional[str] = None, + properties: Properties = EMPTY_DICT, + ) -> View: + raise NotImplementedError + def drop_view(self, identifier: Union[str, Identifier]) -> None: raise NotImplementedError diff --git a/pyiceberg/catalog/sql.py b/pyiceberg/catalog/sql.py index 880a4db481..cf7fa97780 100644 --- a/pyiceberg/catalog/sql.py +++ b/pyiceberg/catalog/sql.py @@ -72,6 +72,8 @@ ) from pyiceberg.typedef import EMPTY_DICT, Identifier, Properties from pyiceberg.types import strtobool +from pyiceberg.view import View +from pyiceberg.view.metadata import ViewVersion if TYPE_CHECKING: import pyarrow as pa @@ -722,6 +724,16 @@ def update_namespace_properties( session.commit() return properties_update_summary + def create_view( + self, + identifier: Union[str, Identifier], + schema: Union[Schema, "pa.Schema"], + view_version: ViewVersion, + location: Optional[str] = None, + properties: Properties = EMPTY_DICT, + ) -> View: + raise NotImplementedError + def list_views(self, namespace: Union[str, Identifier]) -> List[Identifier]: raise NotImplementedError From ec107b118116c8d98554a34e1902d0d329e6561e Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Sat, 28 Jun 2025 16:57:20 -0700 Subject: [PATCH 07/18] Add renaming --- pyiceberg/view/metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiceberg/view/metadata.py b/pyiceberg/view/metadata.py index af331eb7a8..af53f2de00 100644 --- a/pyiceberg/view/metadata.py +++ b/pyiceberg/view/metadata.py @@ -25,7 +25,7 @@ from pyiceberg.types import transform_dict_value_to_str -class ViewRepresentation(IcebergBaseModel): +class SQLViewRepresentation(IcebergBaseModel): """Represents the SQL query that defines the view.""" type: Literal["sql"] = Field() @@ -47,7 +47,7 @@ class ViewVersion(IcebergBaseModel): """Timestamp when the version was created (ms from epoch)""" summary: Dict[str, str] = Field() """A string to string map of summary metadata about the version""" - representations: List[ViewRepresentation] = Field() + representations: List[SQLViewRepresentation] = Field() """A list of representations for the view definition""" default_catalog: Optional[str] = Field(alias="default-catalog", default=None) """Catalog name to use when a reference in the SELECT does not contain a catalog""" From 9c8a2a1454313393d5bf4fd23b70791802a3f3d1 Mon Sep 17 00:00:00 2001 From: Alex Stephen <1325798+rambleraptor@users.noreply.github.com> Date: Wed, 2 Jul 2025 11:00:12 -0700 Subject: [PATCH 08/18] Update pyiceberg/view/metadata.py Co-authored-by: Sung Yun <107272191+sungwy@users.noreply.github.com> --- pyiceberg/view/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/view/metadata.py b/pyiceberg/view/metadata.py index af53f2de00..3397009357 100644 --- a/pyiceberg/view/metadata.py +++ b/pyiceberg/view/metadata.py @@ -51,7 +51,7 @@ class ViewVersion(IcebergBaseModel): """A list of representations for the view definition""" default_catalog: Optional[str] = Field(alias="default-catalog", default=None) """Catalog name to use when a reference in the SELECT does not contain a catalog""" - default_namespace: List[str] = Field(alias="default-namespace") + default_namespace: Namespace = Field(alias="default-namespace") """Namespace to use when a reference in the SELECT is a single identifier""" From 4248b0dfd0e19924f9c09093c5c88c1a51ae1645 Mon Sep 17 00:00:00 2001 From: Alex Stephen <1325798+rambleraptor@users.noreply.github.com> Date: Wed, 2 Jul 2025 11:00:18 -0700 Subject: [PATCH 09/18] Update pyiceberg/view/metadata.py Co-authored-by: Sung Yun <107272191+sungwy@users.noreply.github.com> --- pyiceberg/view/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/view/metadata.py b/pyiceberg/view/metadata.py index 3397009357..65fc7b459b 100644 --- a/pyiceberg/view/metadata.py +++ b/pyiceberg/view/metadata.py @@ -69,7 +69,7 @@ class ViewMetadata(IcebergBaseModel): view_uuid: str = Field(alias="view-uuid") """A UUID that identifies the view, generated when the view is created.""" - format_version: Literal[1] = Field(alias="format-version") + format_version: int = Field(alias='format-version', ge=1, le=1) """An integer version number for the view format; must be 1""" location: str = Field() """The view's base location; used to create metadata file locations""" From 8fb230cd9bd2823e4d415a4a165f1e6f29ca3f9d Mon Sep 17 00:00:00 2001 From: Alex Stephen <1325798+rambleraptor@users.noreply.github.com> Date: Wed, 2 Jul 2025 11:00:27 -0700 Subject: [PATCH 10/18] Update pyiceberg/catalog/rest/__init__.py Co-authored-by: Sung Yun <107272191+sungwy@users.noreply.github.com> --- pyiceberg/catalog/rest/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 520917101b..e33f2de075 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -187,7 +187,7 @@ class CreateViewRequest(IcebergBaseModel): location: Optional[str] = Field() view_schema: Schema = Field(alias="schema") view_version: ViewVersion = Field(alias="view-version") - properties: Dict[str, str] = Field(default_factory=dict) + properties: Properties = Field(default_factory=dict) @field_validator("properties", mode="before") def transform_properties_dict_value_to_str(cls, properties: Properties) -> Dict[str, str]: From ba6527db75dd3c3e980e56a27e1b6782e87db0a9 Mon Sep 17 00:00:00 2001 From: Alex Stephen <1325798+rambleraptor@users.noreply.github.com> Date: Wed, 2 Jul 2025 11:00:33 -0700 Subject: [PATCH 11/18] Update pyiceberg/view/metadata.py Co-authored-by: Sung Yun <107272191+sungwy@users.noreply.github.com> --- pyiceberg/view/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/view/metadata.py b/pyiceberg/view/metadata.py index 65fc7b459b..877a01d397 100644 --- a/pyiceberg/view/metadata.py +++ b/pyiceberg/view/metadata.py @@ -55,7 +55,7 @@ class ViewVersion(IcebergBaseModel): """Namespace to use when a reference in the SELECT is a single identifier""" -class ViewVersionLogEntry(IcebergBaseModel): +class ViewHistoryEntry(IcebergBaseModel): """A log entry of a view version change.""" timestamp_ms: int = Field(alias="timestamp-ms") From 151f18f6167d26772881db6f1d5919dafb169ac0 Mon Sep 17 00:00:00 2001 From: Alex Stephen <1325798+rambleraptor@users.noreply.github.com> Date: Wed, 2 Jul 2025 11:02:25 -0700 Subject: [PATCH 12/18] Update pyiceberg/view/metadata.py Co-authored-by: Sung Yun <107272191+sungwy@users.noreply.github.com> --- pyiceberg/view/metadata.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyiceberg/view/metadata.py b/pyiceberg/view/metadata.py index 877a01d397..ec8ef70248 100644 --- a/pyiceberg/view/metadata.py +++ b/pyiceberg/view/metadata.py @@ -36,6 +36,10 @@ class SQLViewRepresentation(IcebergBaseModel): """The dialect of the SQL, e.g. `spark`, `trino`, `presto`.""" +class ViewRepresentation(BaseModel): + __root__: SQLViewRepresentation + + class ViewVersion(IcebergBaseModel): """A version of the view definition.""" From 277e7407d9e27e5b2b930cff76c4bc532eef51db Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 2 Jul 2025 11:36:48 -0700 Subject: [PATCH 13/18] adding test --- pyiceberg/view/metadata.py | 18 ++++----- tests/integration/test_writes/test_writes.py | 40 ++++++++++++++++++++ 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/pyiceberg/view/metadata.py b/pyiceberg/view/metadata.py index ec8ef70248..88c0b3ae27 100644 --- a/pyiceberg/view/metadata.py +++ b/pyiceberg/view/metadata.py @@ -16,12 +16,12 @@ # under the License. from __future__ import annotations -from typing import Dict, List, Literal, Optional +from typing import Dict, List, Literal, Optional, Union from pydantic import Field, field_validator from pyiceberg.schema import Schema -from pyiceberg.typedef import IcebergBaseModel, Properties +from pyiceberg.typedef import IcebergBaseModel, Identifier, Properties from pyiceberg.types import transform_dict_value_to_str @@ -36,10 +36,10 @@ class SQLViewRepresentation(IcebergBaseModel): """The dialect of the SQL, e.g. `spark`, `trino`, `presto`.""" -class ViewRepresentation(BaseModel): +class ViewRepresentation(IcebergBaseModel): __root__: SQLViewRepresentation - - + + class ViewVersion(IcebergBaseModel): """A version of the view definition.""" @@ -51,11 +51,11 @@ class ViewVersion(IcebergBaseModel): """Timestamp when the version was created (ms from epoch)""" summary: Dict[str, str] = Field() """A string to string map of summary metadata about the version""" - representations: List[SQLViewRepresentation] = Field() + representations: List[ViewRepresentation] = Field() """A list of representations for the view definition""" default_catalog: Optional[str] = Field(alias="default-catalog", default=None) """Catalog name to use when a reference in the SELECT does not contain a catalog""" - default_namespace: Namespace = Field(alias="default-namespace") + default_namespace: Union[str, Identifier] = Field(alias="default-namespace") """Namespace to use when a reference in the SELECT is a single identifier""" @@ -73,7 +73,7 @@ class ViewMetadata(IcebergBaseModel): view_uuid: str = Field(alias="view-uuid") """A UUID that identifies the view, generated when the view is created.""" - format_version: int = Field(alias='format-version', ge=1, le=1) + format_version: int = Field(alias="format-version", ge=1, le=1) """An integer version number for the view format; must be 1""" location: str = Field() """The view's base location; used to create metadata file locations""" @@ -83,7 +83,7 @@ class ViewMetadata(IcebergBaseModel): """ID of the current version of the view (version-id)""" versions: List[ViewVersion] = Field() """A list of known versions of the view""" - version_log: List[ViewVersionLogEntry] = Field(alias="version-log") + version_log: List[ViewHistoryEntry] = Field(alias="version-log") """A list of version log entries""" properties: Properties = Field(default_factory=dict) """A string to string map of view properties""" diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index e63883c1db..35b98734fb 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -62,6 +62,7 @@ StringType, UUIDType, ) +from pyiceberg.view.metadata import SQLViewRepresentation, ViewVersion from utils import _create_table @@ -1551,6 +1552,45 @@ def test_table_v1_with_null_nested_namespace(session_catalog: Catalog, arrow_tab session_catalog.drop_table(identifier) +@pytest.mark.integration +def test_create_view( + spark: SparkSession, + session_catalog: Catalog, +) -> None: + # Create a view using the REST Catalog. + identifier = "default.some_view" + schema = pa.schema([pa.field("some_col", pa.int32())]) + view_version = ViewVersion( + version_id=1, + schema_id=1, + timestamp_ms=int(time.time() * 1000), + summary={}, + representations=[ + SQLViewRepresentation( + type="sql", + sql="SELECT 1 as some_col", + dialect="spark", + ) + ], + default_namespace="default", + ) + session_catalog.create_view( + identifier=identifier, + schema=schema, + view_version=view_version, + ) + + # Ensure the view exists. + assert session_catalog.view_exists(identifier) + + # Query the view in spark to ensure it was properly created. + df = spark.table(identifier) + assert df.count() == 1 + assert df.collect()[0].some_col == 1 + + session_catalog.drop_view(identifier) # clean up + + @pytest.mark.integration def test_view_exists( spark: SparkSession, From 5e62db6e1260ff072449c03d779b9200a9bfbfcb Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 2 Jul 2025 13:32:53 -0700 Subject: [PATCH 14/18] add test and fix --- pyiceberg/view/metadata.py | 8 ++++---- tests/integration/test_writes/test_writes.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyiceberg/view/metadata.py b/pyiceberg/view/metadata.py index 88c0b3ae27..44aedec451 100644 --- a/pyiceberg/view/metadata.py +++ b/pyiceberg/view/metadata.py @@ -18,7 +18,7 @@ from typing import Dict, List, Literal, Optional, Union -from pydantic import Field, field_validator +from pydantic import Field, RootModel, field_validator from pyiceberg.schema import Schema from pyiceberg.typedef import IcebergBaseModel, Identifier, Properties @@ -36,8 +36,8 @@ class SQLViewRepresentation(IcebergBaseModel): """The dialect of the SQL, e.g. `spark`, `trino`, `presto`.""" -class ViewRepresentation(IcebergBaseModel): - __root__: SQLViewRepresentation +class ViewRepresentation(IcebergBaseModel, RootModel): + root: SQLViewRepresentation class ViewVersion(IcebergBaseModel): @@ -55,7 +55,7 @@ class ViewVersion(IcebergBaseModel): """A list of representations for the view definition""" default_catalog: Optional[str] = Field(alias="default-catalog", default=None) """Catalog name to use when a reference in the SELECT does not contain a catalog""" - default_namespace: Union[str, Identifier] = Field(alias="default-namespace") + default_namespace: Identifier = Field(alias="default-namespace") """Namespace to use when a reference in the SELECT is a single identifier""" diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index 35b98734fb..2031839135 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -1572,7 +1572,7 @@ def test_create_view( dialect="spark", ) ], - default_namespace="default", + default_namespace=["default"], ) session_catalog.create_view( identifier=identifier, From 02adc6b3ca2c2d39b00897661c717f2d94fdf951 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 2 Jul 2025 13:33:09 -0700 Subject: [PATCH 15/18] lint fixes --- pyiceberg/view/metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiceberg/view/metadata.py b/pyiceberg/view/metadata.py index 44aedec451..5359ce8957 100644 --- a/pyiceberg/view/metadata.py +++ b/pyiceberg/view/metadata.py @@ -16,7 +16,7 @@ # under the License. from __future__ import annotations -from typing import Dict, List, Literal, Optional, Union +from typing import Dict, List, Literal, Optional from pydantic import Field, RootModel, field_validator From e1f836bcb54efac22f506ae57488aff53006b8c2 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Wed, 2 Jul 2025 17:17:11 -0700 Subject: [PATCH 16/18] address comments --- pyiceberg/typedef.py | 1 + pyiceberg/view/__init__.py | 2 +- pyiceberg/view/metadata.py | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pyiceberg/typedef.py b/pyiceberg/typedef.py index d9ace9d971..a98c0c72bd 100644 --- a/pyiceberg/typedef.py +++ b/pyiceberg/typedef.py @@ -208,3 +208,4 @@ def __hash__(self) -> int: TableVersion: TypeAlias = Literal[1, 2, 3] +ViewVersion: TypeAlias = Literal[1] diff --git a/pyiceberg/view/__init__.py b/pyiceberg/view/__init__.py index d65fc6b114..0d4ebf33a8 100644 --- a/pyiceberg/view/__init__.py +++ b/pyiceberg/view/__init__.py @@ -29,7 +29,7 @@ class View: """An Iceberg view.""" - _identifier: Identifier = Field() + _identifier: Identifier metadata: ViewMetadata def __init__( diff --git a/pyiceberg/view/metadata.py b/pyiceberg/view/metadata.py index 5359ce8957..3061d5c8c6 100644 --- a/pyiceberg/view/metadata.py +++ b/pyiceberg/view/metadata.py @@ -22,6 +22,7 @@ from pyiceberg.schema import Schema from pyiceberg.typedef import IcebergBaseModel, Identifier, Properties +from pyiceberg.typedef import ViewVersion as ViewVersionLiteral from pyiceberg.types import transform_dict_value_to_str @@ -73,7 +74,7 @@ class ViewMetadata(IcebergBaseModel): view_uuid: str = Field(alias="view-uuid") """A UUID that identifies the view, generated when the view is created.""" - format_version: int = Field(alias="format-version", ge=1, le=1) + format_version: ViewVersionLiteral = Field(alias="format-version", ge=1, le=1) """An integer version number for the view format; must be 1""" location: str = Field() """The view's base location; used to create metadata file locations""" From 93ffbec62919b1ddb9422653206df5e697360ac2 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Fri, 4 Jul 2025 10:45:58 -0700 Subject: [PATCH 17/18] lint fix --- pyiceberg/view/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pyiceberg/view/__init__.py b/pyiceberg/view/__init__.py index 0d4ebf33a8..4ddb21a112 100644 --- a/pyiceberg/view/__init__.py +++ b/pyiceberg/view/__init__.py @@ -20,8 +20,6 @@ Any, ) -from pydantic import Field - from pyiceberg.typedef import Identifier from pyiceberg.view.metadata import ViewMetadata From 1c7ff03bc5a40cec5ce34350ce61e86008b843b1 Mon Sep 17 00:00:00 2001 From: Alex Stephen Date: Tue, 22 Jul 2025 22:04:57 -0700 Subject: [PATCH 18/18] docs fix --- pyiceberg/catalog/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py index 8752eafdcc..e23d0bdf2e 100644 --- a/pyiceberg/catalog/__init__.py +++ b/pyiceberg/catalog/__init__.py @@ -673,9 +673,8 @@ def create_view( Args: identifier (str | Identifier): View identifier. schema (Schema): View's schema. + view_version (ViewVersion): The format version for the view. location (str | None): Location for the view. Optional Argument. - partition_spec (PartitionSpec): PartitionSpec for the view. - sort_order (SortOrder): SortOrder for the view. properties (Properties): View properties that can be a string based dictionary. Returns: