From 4b9ce05520ad5dac8de6104faa28666190957294 Mon Sep 17 00:00:00 2001 From: "R. Conner Howell" Date: Tue, 15 Apr 2025 15:07:24 -0700 Subject: [PATCH 1/2] Adds support for creating a GlueCatalog with own client --- pyiceberg/catalog/glue.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index 4f0a9061df..a71b6d6e71 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -303,11 +303,17 @@ def add_glue_catalog_id(params: Dict[str, str], **kwargs: Any) -> None: class GlueCatalog(MetastoreCatalog): - def __init__(self, name: str, **properties: Any): + glue: GlueClient + + def __init__(self, name: str, client: GlueClient | None = None, **properties: Any): super().__init__(name, **properties) retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE) + if client: + self.glue = client + return + session = boto3.Session( profile_name=properties.get(GLUE_PROFILE_NAME), region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION), From b0fd88b3dfbd7ccc82567c363f68410d003d3601 Mon Sep 17 00:00:00 2001 From: "R. Conner Howell" Date: Fri, 18 Apr 2025 09:06:55 -0700 Subject: [PATCH 2/2] Adds doc comment and refactor if..else block --- pyiceberg/catalog/glue.py | 58 ++++++++++++++++++++++---------------- tests/catalog/test_glue.py | 8 ++++++ 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py index a71b6d6e71..01bb0e9b05 100644 --- a/pyiceberg/catalog/glue.py +++ b/pyiceberg/catalog/glue.py @@ -305,36 +305,44 @@ def add_glue_catalog_id(params: Dict[str, str], **kwargs: Any) -> None: class GlueCatalog(MetastoreCatalog): glue: GlueClient - def __init__(self, name: str, client: GlueClient | None = None, **properties: Any): - super().__init__(name, **properties) + def __init__(self, name: str, client: Optional[GlueClient] = None, **properties: Any): + """Glue Catalog. + + You either need to provide a boto3 glue client, or one will be constructed from the properties. - retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE) + Args: + name: Name to identify the catalog. + client: An optional boto3 glue client. + properties: Properties for glue client construction and configuration. + """ + super().__init__(name, **properties) if client: self.glue = client - return - - session = boto3.Session( - profile_name=properties.get(GLUE_PROFILE_NAME), - region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION), - botocore_session=properties.get(BOTOCORE_SESSION), - aws_access_key_id=get_first_property_value(properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID), - aws_secret_access_key=get_first_property_value(properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY), - aws_session_token=get_first_property_value(properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN), - ) - self.glue: GlueClient = session.client( - "glue", - endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT), - config=Config( - retries={ - "max_attempts": properties.get(GLUE_MAX_RETRIES, MAX_RETRIES), - "mode": retry_mode_prop_value if retry_mode_prop_value in EXISTING_RETRY_MODES else STANDARD_RETRY_MODE, - } - ), - ) + else: + retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE) + + session = boto3.Session( + profile_name=properties.get(GLUE_PROFILE_NAME), + region_name=get_first_property_value(properties, GLUE_REGION, AWS_REGION), + botocore_session=properties.get(BOTOCORE_SESSION), + aws_access_key_id=get_first_property_value(properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID), + aws_secret_access_key=get_first_property_value(properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY), + aws_session_token=get_first_property_value(properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN), + ) + self.glue: GlueClient = session.client( + "glue", + endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT), + config=Config( + retries={ + "max_attempts": properties.get(GLUE_MAX_RETRIES, MAX_RETRIES), + "mode": retry_mode_prop_value if retry_mode_prop_value in EXISTING_RETRY_MODES else STANDARD_RETRY_MODE, + } + ), + ) - if glue_catalog_id := properties.get(GLUE_ID): - _register_glue_catalog_id_with_glue_client(self.glue, glue_catalog_id) + if glue_catalog_id := properties.get(GLUE_ID): + _register_glue_catalog_id_with_glue_client(self.glue, glue_catalog_id) def _convert_glue_to_iceberg(self, glue_table: TableTypeDef) -> Table: properties: Properties = glue_table["Parameters"] diff --git a/tests/catalog/test_glue.py b/tests/catalog/test_glue.py index eabbffb378..df1734eb68 100644 --- a/tests/catalog/test_glue.py +++ b/tests/catalog/test_glue.py @@ -932,3 +932,11 @@ def test_glue_endpoint_override(_bucket_initialize: None, moto_endpoint_url: str catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}", "glue.endpoint": test_endpoint} ) assert test_catalog.glue.meta.endpoint_url == test_endpoint + + +@mock_aws +def test_glue_client_override() -> None: + catalog_name = "glue" + test_client = boto3.client("glue", region_name="us-west-2") + test_catalog = GlueCatalog(catalog_name, test_client) + assert test_catalog.glue is test_client