diff --git a/paimon-python/pypaimon/api/auth/dlf_signer.py b/paimon-python/pypaimon/api/auth/dlf_signer.py index 2574ba74cd68..2a8d4c17b465 100644 --- a/paimon-python/pypaimon/api/auth/dlf_signer.py +++ b/paimon-python/pypaimon/api/auth/dlf_signer.py @@ -18,10 +18,12 @@ import base64 import hashlib import hmac +import threading +import time import uuid from abc import ABC, abstractmethod from collections import OrderedDict -from datetime import datetime +from datetime import datetime, timezone from typing import Dict, Optional from urllib.parse import unquote @@ -320,15 +322,21 @@ def sign_headers( security_token: Optional[str], host: str ) -> Dict[str, str]: + if now is None: + raise ValueError("Parameter 'now' cannot be None") + if host is None: + raise ValueError("Parameter 'host' cannot be None") + headers = {} - # Date header in RFC 1123 format - headers[self.DATE_HEADER] = now.strftime(self.DATE_FORMAT) + if now.tzinfo is None: + gmt_time = now.replace(tzinfo=timezone.utc) + else: + gmt_time = now.astimezone(timezone.utc) + headers[self.DATE_HEADER] = gmt_time.strftime(self.DATE_FORMAT) - # Accept header headers[self.ACCEPT_HEADER] = self.ACCEPT_VALUE - # Content-MD5 (if body exists) if body is not None and body != "": try: headers[self.CONTENT_MD5_HEADER] = self._md5_base64(body) @@ -336,16 +344,15 @@ def sign_headers( except Exception as e: raise RuntimeError(f"Failed to calculate Content-MD5: {e}") - # Host header headers[self.HOST_HEADER] = host - # x-acs-* headers headers[self.X_ACS_SIGNATURE_METHOD] = self.SIGNATURE_METHOD_VALUE - headers[self.X_ACS_SIGNATURE_NONCE] = str(uuid.uuid4()) + + nonce = self._generate_unique_nonce() + headers[self.X_ACS_SIGNATURE_NONCE] = nonce headers[self.X_ACS_SIGNATURE_VERSION] = self.SIGNATURE_VERSION_VALUE headers[self.X_ACS_VERSION] = self.API_VERSION - # Security token (if present) if security_token is not None: headers[self.X_ACS_SECURITY_TOKEN] = security_token @@ -358,22 +365,22 @@ def authorization( host: str, sign_headers: Dict[str, str] ) -> str: + if rest_auth_parameter is None: + raise ValueError("Parameter 'rest_auth_parameter' cannot be None") + if token is None: + raise ValueError("Parameter 'token' cannot be None") + if host is None: + raise ValueError("Parameter 'host' cannot be None") + if sign_headers is None: + raise ValueError("Parameter 'sign_headers' cannot be None") + try: - # Step 1: Build CanonicalizedHeaders (x-acs-* headers, sorted, lowercase) canonicalized_headers = self._build_canonicalized_headers(sign_headers) - - # Step 2: Build CanonicalizedResource (path + sorted query string) canonicalized_resource = self._build_canonicalized_resource(rest_auth_parameter) - - # Step 3: Build StringToSign string_to_sign = self._build_string_to_sign( rest_auth_parameter, sign_headers, canonicalized_headers, canonicalized_resource ) - - # Step 4: Calculate signature signature = self._calculate_signature(string_to_sign, token.access_key_secret) - - # Step 5: Build Authorization header return f"acs {token.access_key_id}:{signature}" except Exception as e: @@ -462,6 +469,15 @@ def _calculate_signature(self, string_to_sign: str, access_key_secret: str) -> s except Exception as e: raise RuntimeError(f"Failed to calculate signature: {e}") + def _generate_unique_nonce(self) -> str: + """Generate unique nonce with UUID, timestamp, and thread ID.""" + unique_nonce = [] + uuid_val = str(uuid.uuid4()) + unique_nonce.append(uuid_val) + unique_nonce.append(str(int(time.time() * 1000))) + unique_nonce.append(str(threading.current_thread().ident)) + return "".join(unique_nonce) + @staticmethod def _md5_base64(data: str) -> str: md5_hash = hashlib.md5(data.encode("utf-8")).digest() diff --git a/paimon-python/pypaimon/tests/rest/dlf_signer_test.py b/paimon-python/pypaimon/tests/rest/dlf_signer_test.py index a2e72f599ee4..ad4e4632f7b4 100644 --- a/paimon-python/pypaimon/tests/rest/dlf_signer_test.py +++ b/paimon-python/pypaimon/tests/rest/dlf_signer_test.py @@ -15,6 +15,8 @@ # limitations under the License. import unittest +import re +import threading from datetime import datetime, timezone from pypaimon.api.auth import ( @@ -150,6 +152,103 @@ def test_parse_signing_algo_from_uri(self): self.assertEqual("default", parse("")) self.assertEqual("default", parse(None)) + def test_openapi_sign_headers_with_enhanced_nonce(self): + """Test enhanced nonce generation.""" + signer = DLFOpenApiSigner() + body = '{"CategoryName":"test","CategoryType":"UNSTRUCTURED"}' + now = datetime(2025, 4, 16, 3, 44, 46, tzinfo=timezone.utc) + host = "dlfnext.cn-beijing.aliyuncs.com" + + headers = signer.sign_headers(body, now, None, host) + + self.assertIsNotNone(headers.get("Date")) + self.assertEqual("application/json", headers.get("Accept")) + self.assertIsNotNone(headers.get("Content-MD5")) + self.assertEqual("application/json", headers.get("Content-Type")) + self.assertEqual(host, headers.get("Host")) + self.assertEqual("HMAC-SHA1", headers.get("x-acs-signature-method")) + + nonce_value = headers.get("x-acs-signature-nonce") + self.assertIsNotNone(nonce_value) + + uuid_pattern = re.compile(r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}') + uuid_match = uuid_pattern.search(nonce_value) + self.assertIsNotNone(uuid_match, f"No UUID pattern found in nonce: {nonce_value}") + + digit_pattern = re.compile(r'\d+') + digit_matches = digit_pattern.findall(nonce_value) + self.assertGreater(len(digit_matches), 0, f"No numeric parts found in nonce: {nonce_value}") + + timestamp_found = any(len(part) >= 10 for part in digit_matches) + self.assertTrue(timestamp_found, f"No timestamp-like part found in nonce: {nonce_value}") + + self.assertEqual("1.0", headers.get("x-acs-signature-version")) + self.assertEqual("2026-01-18", headers.get("x-acs-version")) + + def test_concurrent_nonce_generation(self): + """Test nonce generation thread safety.""" + signer = DLFOpenApiSigner() + body = '{"test":"data"}' + now = datetime.now(timezone.utc) + host = "test-host" + thread_count = 10 + iterations_per_thread = 50 + + nonces = set() + + def worker(): + for _ in range(iterations_per_thread): + headers = signer.sign_headers(body, now, None, host) + nonce = headers.get("x-acs-signature-nonce") + nonces.add(nonce) + + threads = [] + for _ in range(thread_count): + thread = threading.Thread(target=worker) + threads.append(thread) + thread.start() + + for thread in threads: + thread.join() + + expected_total = thread_count * iterations_per_thread + self.assertEqual(expected_total, len(nonces), + f"Expected {expected_total} unique nonces, but got {len(nonces)}. " + f"Possible duplicate nonces generated.") + + def test_parameter_validation(self): + """Test parameter validation.""" + signer = DLFOpenApiSigner() + + with self.assertRaises(ValueError) as context: + signer.sign_headers("body", None, "token", "host") + self.assertIn("'now' cannot be None", str(context.exception)) + + now = datetime.now(timezone.utc) + with self.assertRaises(ValueError) as context: + signer.sign_headers("body", now, "token", None) + self.assertIn("'host' cannot be None", str(context.exception)) + + token = DLFToken("ak", "sk", "token", None) + rest_param = RESTAuthParameter("GET", "/", "", {}) + headers = signer.sign_headers("", now, "", "host") + + with self.assertRaises(ValueError) as context: + signer.authorization(None, token, "host", headers) + self.assertIn("'rest_auth_parameter' cannot be None", str(context.exception)) + + with self.assertRaises(ValueError) as context: + signer.authorization(rest_param, None, "host", headers) + self.assertIn("'token' cannot be None", str(context.exception)) + + with self.assertRaises(ValueError) as context: + signer.authorization(rest_param, token, None, headers) + self.assertIn("'host' cannot be None", str(context.exception)) + + with self.assertRaises(ValueError) as context: + signer.authorization(rest_param, token, "host", None) + self.assertIn("'sign_headers' cannot be None", str(context.exception)) + if __name__ == '__main__': unittest.main()