From 41dba49519bf8e9a29e337c90e20e8b0d28ef1b7 Mon Sep 17 00:00:00 2001 From: Colm Dougan Date: Mon, 23 Jun 2025 23:09:34 +0100 Subject: [PATCH] Added new configuration parameter hive.kerberos-service-name (#2032) --- mkdocs/docs/configuration.md | 9 +++++---- pyiceberg/catalog/hive.py | 14 ++++++++++++-- tests/catalog/test_hive.py | 18 ++++++++++++++++-- 3 files changed, 33 insertions(+), 8 deletions(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 8503a846d6..8c7fa694cb 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -438,10 +438,11 @@ catalog: s3.secret-access-key: password ``` -| Key | Example | Description | -|------------------------------| ------- | --------------------------------- | -| hive.hive2-compatible | true | Using Hive 2.x compatibility mode | -| hive.kerberos-authentication | true | Using authentication via Kerberos | +| Key | Example | Description | +|------------------------------| ------- | ------------------------------------ | +| hive.hive2-compatible | true | Using Hive 2.x compatibility mode | +| hive.kerberos-authentication | true | Using authentication via Kerberos | +| hive.kerberos-service-name | hive | Kerberos service name (default hive) | When using Hive 2.x, make sure to set the compatibility flag: diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 05dd408fc2..09437dd1b6 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -130,6 +130,8 @@ HIVE_KERBEROS_AUTH = "hive.kerberos-authentication" HIVE_KERBEROS_AUTH_DEFAULT = False +HIVE_KERBEROS_SERVICE_NAME = "hive.kerberos-service-name" +HIVE_KERBEROS_SERVICE_NAME_DEFAULT = "hive" LOCK_CHECK_MIN_WAIT_TIME = "lock-check-min-wait-time" LOCK_CHECK_MAX_WAIT_TIME = "lock-check-max-wait-time" @@ -149,9 +151,16 @@ class _HiveClient: _transport: TTransport _ugi: Optional[List[str]] - def __init__(self, uri: str, ugi: Optional[str] = None, kerberos_auth: Optional[bool] = HIVE_KERBEROS_AUTH_DEFAULT): + def __init__( + self, + uri: str, + ugi: Optional[str] = None, + kerberos_auth: Optional[bool] = HIVE_KERBEROS_AUTH_DEFAULT, + kerberos_service_name: Optional[str] = HIVE_KERBEROS_SERVICE_NAME, + ): self._uri = uri self._kerberos_auth = kerberos_auth + self._kerberos_service_name = kerberos_service_name self._ugi = ugi.split(":") if ugi else None self._transport = self._init_thrift_transport() @@ -161,7 +170,7 @@ def _init_thrift_transport(self) -> TTransport: if not self._kerberos_auth: return TTransport.TBufferedTransport(socket) else: - return TTransport.TSaslClientTransport(socket, host=url_parts.hostname, service="hive") + return TTransport.TSaslClientTransport(socket, host=url_parts.hostname, service=self._kerberos_service_name) def _client(self) -> Client: protocol = TBinaryProtocol.TBinaryProtocol(self._transport) @@ -314,6 +323,7 @@ def _create_hive_client(properties: Dict[str, str]) -> _HiveClient: uri, properties.get("ugi"), property_as_bool(properties, HIVE_KERBEROS_AUTH, HIVE_KERBEROS_AUTH_DEFAULT), + properties.get(HIVE_KERBEROS_SERVICE_NAME, HIVE_KERBEROS_SERVICE_NAME_DEFAULT), ) except BaseException as e: last_exception = e diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index fef0d6acc6..a36425ebea 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -48,6 +48,7 @@ DO_NOT_UPDATE_STATS, DO_NOT_UPDATE_STATS_DEFAULT, HIVE_KERBEROS_AUTH, + HIVE_KERBEROS_SERVICE_NAME, LOCK_CHECK_MAX_WAIT_TIME, LOCK_CHECK_MIN_WAIT_TIME, LOCK_CHECK_RETRIES, @@ -1300,7 +1301,20 @@ def test_create_hive_client_success() -> None: with patch("pyiceberg.catalog.hive._HiveClient", return_value=MagicMock()) as mock_hive_client: client = HiveCatalog._create_hive_client(properties) - mock_hive_client.assert_called_once_with("thrift://localhost:10000", "user", False) + mock_hive_client.assert_called_once_with("thrift://localhost:10000", "user", False, "hive") + assert client is not None + + +def test_create_hive_client_with_kerberos_success() -> None: + properties = { + "uri": "thrift://localhost:10000", + "ugi": "user", + HIVE_KERBEROS_AUTH: "true", + HIVE_KERBEROS_SERVICE_NAME: "hiveuser", + } + with patch("pyiceberg.catalog.hive._HiveClient", return_value=MagicMock()) as mock_hive_client: + client = HiveCatalog._create_hive_client(properties) + mock_hive_client.assert_called_once_with("thrift://localhost:10000", "user", True, "hiveuser") assert client is not None @@ -1313,7 +1327,7 @@ def test_create_hive_client_multiple_uris() -> None: client = HiveCatalog._create_hive_client(properties) assert mock_hive_client.call_count == 2 mock_hive_client.assert_has_calls( - [call("thrift://localhost:10000", "user", False), call("thrift://localhost:10001", "user", False)] + [call("thrift://localhost:10000", "user", False, "hive"), call("thrift://localhost:10001", "user", False, "hive")] ) assert client is not None