Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -438,10 +438,11 @@ catalog:
s3.secret-access-key: password
```

| Key | Example | Description |
|------------------------------| ------- | --------------------------------- |
| hive.hive2-compatible | true | Using Hive 2.x compatibility mode |
| hive.kerberos-authentication | true | Using authentication via Kerberos |
| Key | Example | Description |
|------------------------------| ------- | ------------------------------------ |
| hive.hive2-compatible | true | Using Hive 2.x compatibility mode |
| hive.kerberos-authentication | true | Using authentication via Kerberos |
| hive.kerberos-service-name | hive | Kerberos service name (default hive) |

When using Hive 2.x, make sure to set the compatibility flag:

Expand Down
14 changes: 12 additions & 2 deletions pyiceberg/catalog/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@

HIVE_KERBEROS_AUTH = "hive.kerberos-authentication"
HIVE_KERBEROS_AUTH_DEFAULT = False
HIVE_KERBEROS_SERVICE_NAME = "hive.kerberos-service-name"
HIVE_KERBEROS_SERVICE_NAME_DEFAULT = "hive"

LOCK_CHECK_MIN_WAIT_TIME = "lock-check-min-wait-time"
LOCK_CHECK_MAX_WAIT_TIME = "lock-check-max-wait-time"
Expand All @@ -149,9 +151,16 @@ class _HiveClient:
_transport: TTransport
_ugi: Optional[List[str]]

def __init__(self, uri: str, ugi: Optional[str] = None, kerberos_auth: Optional[bool] = HIVE_KERBEROS_AUTH_DEFAULT):
def __init__(
self,
uri: str,
ugi: Optional[str] = None,
kerberos_auth: Optional[bool] = HIVE_KERBEROS_AUTH_DEFAULT,
kerberos_service_name: Optional[str] = HIVE_KERBEROS_SERVICE_NAME,
):
self._uri = uri
self._kerberos_auth = kerberos_auth
self._kerberos_service_name = kerberos_service_name
self._ugi = ugi.split(":") if ugi else None
self._transport = self._init_thrift_transport()

Expand All @@ -161,7 +170,7 @@ def _init_thrift_transport(self) -> TTransport:
if not self._kerberos_auth:
return TTransport.TBufferedTransport(socket)
else:
return TTransport.TSaslClientTransport(socket, host=url_parts.hostname, service="hive")
return TTransport.TSaslClientTransport(socket, host=url_parts.hostname, service=self._kerberos_service_name)

def _client(self) -> Client:
protocol = TBinaryProtocol.TBinaryProtocol(self._transport)
Expand Down Expand Up @@ -314,6 +323,7 @@ def _create_hive_client(properties: Dict[str, str]) -> _HiveClient:
uri,
properties.get("ugi"),
property_as_bool(properties, HIVE_KERBEROS_AUTH, HIVE_KERBEROS_AUTH_DEFAULT),
properties.get(HIVE_KERBEROS_SERVICE_NAME, HIVE_KERBEROS_SERVICE_NAME_DEFAULT),
)
except BaseException as e:
last_exception = e
Expand Down
18 changes: 16 additions & 2 deletions tests/catalog/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
DO_NOT_UPDATE_STATS,
DO_NOT_UPDATE_STATS_DEFAULT,
HIVE_KERBEROS_AUTH,
HIVE_KERBEROS_SERVICE_NAME,
LOCK_CHECK_MAX_WAIT_TIME,
LOCK_CHECK_MIN_WAIT_TIME,
LOCK_CHECK_RETRIES,
Expand Down Expand Up @@ -1300,7 +1301,20 @@ def test_create_hive_client_success() -> None:

with patch("pyiceberg.catalog.hive._HiveClient", return_value=MagicMock()) as mock_hive_client:
client = HiveCatalog._create_hive_client(properties)
mock_hive_client.assert_called_once_with("thrift://localhost:10000", "user", False)
mock_hive_client.assert_called_once_with("thrift://localhost:10000", "user", False, "hive")
assert client is not None


def test_create_hive_client_with_kerberos_success() -> None:
properties = {
"uri": "thrift://localhost:10000",
"ugi": "user",
HIVE_KERBEROS_AUTH: "true",
HIVE_KERBEROS_SERVICE_NAME: "hiveuser",
}
with patch("pyiceberg.catalog.hive._HiveClient", return_value=MagicMock()) as mock_hive_client:
client = HiveCatalog._create_hive_client(properties)
mock_hive_client.assert_called_once_with("thrift://localhost:10000", "user", True, "hiveuser")
assert client is not None


Expand All @@ -1313,7 +1327,7 @@ def test_create_hive_client_multiple_uris() -> None:
client = HiveCatalog._create_hive_client(properties)
assert mock_hive_client.call_count == 2
mock_hive_client.assert_has_calls(
[call("thrift://localhost:10000", "user", False), call("thrift://localhost:10001", "user", False)]
[call("thrift://localhost:10000", "user", False, "hive"), call("thrift://localhost:10001", "user", False, "hive")]
)
assert client is not None

Expand Down