From 46eaa77ad578c00e0e53573399d5c440e5eb0da6 Mon Sep 17 00:00:00 2001 From: Sai Shree Pradhan Date: Fri, 23 May 2025 11:23:40 +0530 Subject: [PATCH] Added classes required for telemetry Signed-off-by: Sai Shree Pradhan --- src/databricks/sql/client.py | 7 ++++ .../telemetry/DriverConnectionParameters.py | 41 +++++++++++++++++++ .../sql/telemetry/DriverErrorInfo.py | 23 +++++++++++ .../telemetry/DriverSystemConfiguration.py | 37 +++++++++++++++++ .../sql/telemetry/DriverVolumeOperation.py | 21 ++++++++++ .../sql/telemetry/FrontendLogContext.py | 20 +++++++++ .../sql/telemetry/FrontendLogEntry.py | 11 +++++ src/databricks/sql/telemetry/HostDetails.py | 18 ++++++++ .../sql/telemetry/SqlExecutionEvent.py | 24 +++++++++++ .../sql/telemetry/TelemetryClientContext.py | 18 ++++++++ .../sql/telemetry/TelemetryEvent.py | 25 +++++++++++ .../sql/telemetry/TelemetryFrontendLog.py | 15 +++++++ .../sql/telemetry/TelemetryRequest.py | 13 ++++++ .../sql/telemetry/TelemetryResponse.py | 13 ++++++ .../sql/telemetry/enums/AuthFlow.py | 8 ++++ .../sql/telemetry/enums/AuthMech.py | 7 ++++ .../telemetry/enums/DatabricksClientType.py | 6 +++ .../enums/DriverVolumeOperationType.py | 10 +++++ .../telemetry/enums/ExecutionResultFormat.py | 8 ++++ .../sql/telemetry/enums/StatementType.py | 9 ++++ 20 files changed, 334 insertions(+) create mode 100644 src/databricks/sql/telemetry/DriverConnectionParameters.py create mode 100644 src/databricks/sql/telemetry/DriverErrorInfo.py create mode 100644 src/databricks/sql/telemetry/DriverSystemConfiguration.py create mode 100644 src/databricks/sql/telemetry/DriverVolumeOperation.py create mode 100644 src/databricks/sql/telemetry/FrontendLogContext.py create mode 100644 src/databricks/sql/telemetry/FrontendLogEntry.py create mode 100644 src/databricks/sql/telemetry/HostDetails.py create mode 100644 src/databricks/sql/telemetry/SqlExecutionEvent.py create mode 100644 src/databricks/sql/telemetry/TelemetryClientContext.py create mode 100644 src/databricks/sql/telemetry/TelemetryEvent.py create mode 100644 src/databricks/sql/telemetry/TelemetryFrontendLog.py create mode 100644 src/databricks/sql/telemetry/TelemetryRequest.py create mode 100644 src/databricks/sql/telemetry/TelemetryResponse.py create mode 100644 src/databricks/sql/telemetry/enums/AuthFlow.py create mode 100644 src/databricks/sql/telemetry/enums/AuthMech.py create mode 100644 src/databricks/sql/telemetry/enums/DatabricksClientType.py create mode 100644 src/databricks/sql/telemetry/enums/DriverVolumeOperationType.py create mode 100644 src/databricks/sql/telemetry/enums/ExecutionResultFormat.py create mode 100644 src/databricks/sql/telemetry/enums/StatementType.py diff --git a/src/databricks/sql/client.py b/src/databricks/sql/client.py index 0c9a08a85..3f136adeb 100755 --- a/src/databricks/sql/client.py +++ b/src/databricks/sql/client.py @@ -234,6 +234,13 @@ def read(self) -> Optional[OAuthToken]: server_hostname, **kwargs ) + self.server_telemetry_enabled = True + self.client_telemetry_enabled = kwargs.get("enable_telemetry", False) + self.telemetry_enabled = ( + self.client_telemetry_enabled and self.server_telemetry_enabled + ) + telemetry_batch_size = kwargs.get("telemetry_batch_size", 200) + user_agent_entry = kwargs.get("user_agent_entry") if user_agent_entry is None: user_agent_entry = kwargs.get("_user_agent_entry") diff --git a/src/databricks/sql/telemetry/DriverConnectionParameters.py b/src/databricks/sql/telemetry/DriverConnectionParameters.py new file mode 100644 index 000000000..0e10a7434 --- /dev/null +++ b/src/databricks/sql/telemetry/DriverConnectionParameters.py @@ -0,0 +1,41 @@ +import json +from dataclasses import dataclass, asdict +from databricks.sql.telemetry.HostDetails import HostDetails +from databricks.sql.telemetry.enums.AuthMech import AuthMech +from databricks.sql.telemetry.enums.AuthFlow import AuthFlow +from databricks.sql.telemetry.enums.DatabricksClientType import DatabricksClientType + + +@dataclass +class DriverConnectionParameters: + http_path: str + driver_mode: DatabricksClientType + host_details: HostDetails + auth_mech: AuthMech + auth_flow: AuthFlow + auth_scope: str + discovery_url: str + allowed_volume_ingestion_paths: str + azure_tenant_id: str + socket_timeout: int + + def to_json(self): + return json.dumps(asdict(self)) + + +# Part of TelemetryEvent +# DriverConnectionParameters connectionParams = new DriverConnectionParameters( +# httpPath = " /sql/1.0/endpoints/1234567890abcdef", +# driverMode = "THRIFT", +# hostDetails = new HostDetails( +# hostUrl = "https://my-workspace.cloud.databricks.com", +# port = 443 +# ), +# authMech = "OAUTH", +# authFlow = "AZURE_MANAGED_IDENTITIES", +# authScope = "sql", +# discoveryUrl = "https://example-url", +# allowedVolumeIngestionPaths = "[]", +# azureTenantId = "1234567890abcdef", +# socketTimeout = 10000 +# ) diff --git a/src/databricks/sql/telemetry/DriverErrorInfo.py b/src/databricks/sql/telemetry/DriverErrorInfo.py new file mode 100644 index 000000000..f83c73d01 --- /dev/null +++ b/src/databricks/sql/telemetry/DriverErrorInfo.py @@ -0,0 +1,23 @@ +import json +from dataclasses import dataclass, asdict + + +@dataclass +class DriverErrorInfo: + error_name: str + stack_trace: str + + def to_json(self): + return json.dumps(asdict(self)) + + +# Required for ErrorLogs +# DriverErrorInfo errorInfo = new DriverErrorInfo( +# errorName="CONNECTION_ERROR", +# stackTrace="Connection failure while using the Databricks SQL Python connector. Failed to connect to server: https://my-workspace.cloud.databricks.com\n" + +# "databricks.sql.exc.OperationalError: Connection refused: connect\n" + +# "at databricks.sql.thrift_backend.ThriftBackend.make_request(ThriftBackend.py:329)\n" + +# "at databricks.sql.thrift_backend.ThriftBackend.attempt_request(ThriftBackend.py:366)\n" + +# "at databricks.sql.thrift_backend.ThriftBackend.open_session(ThriftBackend.py:575)\n" + +# "at databricks.sql.client.Connection.__init__(client.py:69)\n" + +# "at databricks.sql.client.connect(connection.py:123)") diff --git a/src/databricks/sql/telemetry/DriverSystemConfiguration.py b/src/databricks/sql/telemetry/DriverSystemConfiguration.py new file mode 100644 index 000000000..b3af2132d --- /dev/null +++ b/src/databricks/sql/telemetry/DriverSystemConfiguration.py @@ -0,0 +1,37 @@ +import json +from dataclasses import dataclass, asdict +from databricks.sql import __version__ + + +@dataclass +class DriverSystemConfiguration: + driver_version: str + os_name: str + os_version: str + os_arch: str + runtime_name: str + runtime_version: str + runtime_vendor: str + client_app_name: str + locale_name: str + driver_name: str + char_set_encoding: str + + def to_json(self): + return json.dumps(asdict(self)) + + +# Part of TelemetryEvent +# DriverSystemConfiguration systemConfig = new DriverSystemConfiguration( +# driver_version = "2.9.3", +# os_name = "Darwin", +# os_version = "24.4.0", +# os_arch = "arm64", +# runtime_name = "CPython", +# runtime_version = "3.13.3", +# runtime_vendor = "cpython", +# client_app_name = "databricks-sql-python", +# locale_name = "en_US", +# driver_name = "databricks-sql-python", +# char_set_encoding = "UTF-8" +# ) diff --git a/src/databricks/sql/telemetry/DriverVolumeOperation.py b/src/databricks/sql/telemetry/DriverVolumeOperation.py new file mode 100644 index 000000000..9cdaae435 --- /dev/null +++ b/src/databricks/sql/telemetry/DriverVolumeOperation.py @@ -0,0 +1,21 @@ +import json +from dataclasses import dataclass, asdict +from databricks.sql.telemetry.enums.DriverVolumeOperationType import ( + DriverVolumeOperationType, +) + + +@dataclass +class DriverVolumeOperation: + volume_operation_type: DriverVolumeOperationType + volume_path: str + + def to_json(self): + return json.dumps(asdict(self)) + + +# Part of TelemetryEvent +# DriverVolumeOperation volumeOperation = new DriverVolumeOperation( +# volumeOperationType = "LIST", +# volumePath = "/path/to/volume" +# ) diff --git a/src/databricks/sql/telemetry/FrontendLogContext.py b/src/databricks/sql/telemetry/FrontendLogContext.py new file mode 100644 index 000000000..55cefb319 --- /dev/null +++ b/src/databricks/sql/telemetry/FrontendLogContext.py @@ -0,0 +1,20 @@ +import json +from dataclasses import dataclass, asdict +from databricks.sql.telemetry.TelemetryClientContext import TelemetryClientContext + + +@dataclass +class FrontendLogContext: + client_context: TelemetryClientContext + + def to_json(self): + return json.dumps(asdict(self)) + + +# used in TelemetryFrontendLog +# FrontendLogContext frontendLogContext = new FrontendLogContext( +# clientContext = new TelemetryClientContext( +# timestampMillis = 1716489600000, +# userAgent = "databricks-sql-python-test" +# ) +# ) diff --git a/src/databricks/sql/telemetry/FrontendLogEntry.py b/src/databricks/sql/telemetry/FrontendLogEntry.py new file mode 100644 index 000000000..1351ec31a --- /dev/null +++ b/src/databricks/sql/telemetry/FrontendLogEntry.py @@ -0,0 +1,11 @@ +import json +from dataclasses import dataclass, asdict +from databricks.sql.telemetry.TelemetryEvent import TelemetryEvent + + +@dataclass +class FrontendLogEntry: + sql_driver_log: TelemetryEvent + + def to_json(self): + return json.dumps(asdict(self)) diff --git a/src/databricks/sql/telemetry/HostDetails.py b/src/databricks/sql/telemetry/HostDetails.py new file mode 100644 index 000000000..e8cfb4c7a --- /dev/null +++ b/src/databricks/sql/telemetry/HostDetails.py @@ -0,0 +1,18 @@ +import json +from dataclasses import dataclass, asdict + + +@dataclass +class HostDetails: + host_url: str + port: int + + def to_json(self): + return json.dumps(asdict(self)) + + +# Part of DriverConnectionParameters +# HostDetails hostDetails = new HostDetails( +# hostUrl = "https://my-workspace.cloud.databricks.com", +# port = 443 +# ) diff --git a/src/databricks/sql/telemetry/SqlExecutionEvent.py b/src/databricks/sql/telemetry/SqlExecutionEvent.py new file mode 100644 index 000000000..ca33488c0 --- /dev/null +++ b/src/databricks/sql/telemetry/SqlExecutionEvent.py @@ -0,0 +1,24 @@ +import json +from dataclasses import dataclass, asdict +from databricks.sql.telemetry.enums.StatementType import StatementType +from databricks.sql.telemetry.enums.ExecutionResultFormat import ExecutionResultFormat + + +@dataclass +class SqlExecutionEvent: + statement_type: StatementType + is_compressed: bool + execution_result: ExecutionResultFormat + retry_count: int + + def to_json(self): + return json.dumps(asdict(self)) + + +# Part of TelemetryEvent +# SqlExecutionEvent sqlExecutionEvent = new SqlExecutionEvent( +# statementType = "QUERY", +# isCompressed = true, +# executionResult = "INLINE_ARROW", +# retryCount = 0 +# ) diff --git a/src/databricks/sql/telemetry/TelemetryClientContext.py b/src/databricks/sql/telemetry/TelemetryClientContext.py new file mode 100644 index 000000000..896b347f5 --- /dev/null +++ b/src/databricks/sql/telemetry/TelemetryClientContext.py @@ -0,0 +1,18 @@ +from dataclasses import dataclass, asdict +import json + + +@dataclass +class TelemetryClientContext: + timestamp_millis: int + user_agent: str + + def to_json(self): + return json.dumps(asdict(self)) + + +# used in FrontendLogContext +# TelemetryClientContext clientContext = new TelemetryClientContext( +# timestampMillis = 1716489600000, +# userAgent = "databricks-sql-python-test" +# ) diff --git a/src/databricks/sql/telemetry/TelemetryEvent.py b/src/databricks/sql/telemetry/TelemetryEvent.py new file mode 100644 index 000000000..fcbb2b77d --- /dev/null +++ b/src/databricks/sql/telemetry/TelemetryEvent.py @@ -0,0 +1,25 @@ +import json +from dataclasses import dataclass, asdict +from databricks.sql.telemetry.DriverSystemConfiguration import DriverSystemConfiguration +from databricks.sql.telemetry.DriverConnectionParameters import ( + DriverConnectionParameters, +) +from databricks.sql.telemetry.DriverVolumeOperation import DriverVolumeOperation +from databricks.sql.telemetry.SqlExecutionEvent import SqlExecutionEvent +from databricks.sql.telemetry.DriverErrorInfo import DriverErrorInfo + + +@dataclass +class TelemetryEvent: + session_id: str + sql_statement_id: str + system_configuration: DriverSystemConfiguration + driver_connection_params: DriverConnectionParameters + auth_type: str + vol_operation: DriverVolumeOperation + sql_operation: SqlExecutionEvent + error_info: DriverErrorInfo + latency: int + + def to_json(self): + return json.dumps(asdict(self)) diff --git a/src/databricks/sql/telemetry/TelemetryFrontendLog.py b/src/databricks/sql/telemetry/TelemetryFrontendLog.py new file mode 100644 index 000000000..abbc1120e --- /dev/null +++ b/src/databricks/sql/telemetry/TelemetryFrontendLog.py @@ -0,0 +1,15 @@ +import json +from dataclasses import dataclass, asdict +from databricks.sql.telemetry.FrontendLogContext import FrontendLogContext +from databricks.sql.telemetry.FrontendLogEntry import FrontendLogEntry + + +@dataclass +class TelemetryFrontendLog: + workspace_id: int + frontend_log_event_id: str + context: FrontendLogContext + entry: FrontendLogEntry + + def to_json(self): + return json.dumps(asdict(self)) diff --git a/src/databricks/sql/telemetry/TelemetryRequest.py b/src/databricks/sql/telemetry/TelemetryRequest.py new file mode 100644 index 000000000..8142e1182 --- /dev/null +++ b/src/databricks/sql/telemetry/TelemetryRequest.py @@ -0,0 +1,13 @@ +import json +from dataclasses import dataclass, asdict +from typing import List, Optional + + +@dataclass +class TelemetryRequest: + uploadTime: int + items: List[str] + protoLogs: Optional[List[str]] + + def to_json(self): + return json.dumps(asdict(self)) diff --git a/src/databricks/sql/telemetry/TelemetryResponse.py b/src/databricks/sql/telemetry/TelemetryResponse.py new file mode 100644 index 000000000..3b14050d1 --- /dev/null +++ b/src/databricks/sql/telemetry/TelemetryResponse.py @@ -0,0 +1,13 @@ +import json +from dataclasses import dataclass, asdict +from typing import List, Optional + + +@dataclass +class TelemetryResponse: + errors: List[str] + numSuccess: int + numProtoSuccess: int + + def to_json(self): + return json.dumps(asdict(self)) diff --git a/src/databricks/sql/telemetry/enums/AuthFlow.py b/src/databricks/sql/telemetry/enums/AuthFlow.py new file mode 100644 index 000000000..2afc35c74 --- /dev/null +++ b/src/databricks/sql/telemetry/enums/AuthFlow.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class AuthFlow(Enum): + TOKEN_PASSTHROUGH = "token_passthrough" + CLIENT_CREDENTIALS = "client_credentials" + BROWSER_BASED_AUTHENTICATION = "browser_based_authentication" + AZURE_MANAGED_IDENTITIES = "azure_managed_identities" diff --git a/src/databricks/sql/telemetry/enums/AuthMech.py b/src/databricks/sql/telemetry/enums/AuthMech.py new file mode 100644 index 000000000..6425eea47 --- /dev/null +++ b/src/databricks/sql/telemetry/enums/AuthMech.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class AuthMech(Enum): + OTHER = "other" + PAT = "pat" + OAUTH = "oauth" diff --git a/src/databricks/sql/telemetry/enums/DatabricksClientType.py b/src/databricks/sql/telemetry/enums/DatabricksClientType.py new file mode 100644 index 000000000..8e08c355f --- /dev/null +++ b/src/databricks/sql/telemetry/enums/DatabricksClientType.py @@ -0,0 +1,6 @@ +from enum import Enum + + +class DatabricksClientType(Enum): + SEA = "SEA" + THRIFT = "THRIFT" diff --git a/src/databricks/sql/telemetry/enums/DriverVolumeOperationType.py b/src/databricks/sql/telemetry/enums/DriverVolumeOperationType.py new file mode 100644 index 000000000..581e56c25 --- /dev/null +++ b/src/databricks/sql/telemetry/enums/DriverVolumeOperationType.py @@ -0,0 +1,10 @@ +from enum import Enum + + +class DriverVolumeOperationType(Enum): + TYPE_UNSPECIFIED = "type_unspecified" + PUT = "put" + GET = "get" + DELETE = "delete" + LIST = "list" + QUERY = "query" diff --git a/src/databricks/sql/telemetry/enums/ExecutionResultFormat.py b/src/databricks/sql/telemetry/enums/ExecutionResultFormat.py new file mode 100644 index 000000000..23e18150e --- /dev/null +++ b/src/databricks/sql/telemetry/enums/ExecutionResultFormat.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class ExecutionResultFormat(Enum): + FORMAT_UNSPECIFIED = "format_unspecified" + INLINE_ARROW = "inline_arrow" + EXTERNAL_LINKS = "external_links" + COLUMNAR_INLINE = "columnar_inline" diff --git a/src/databricks/sql/telemetry/enums/StatementType.py b/src/databricks/sql/telemetry/enums/StatementType.py new file mode 100644 index 000000000..cea86bab8 --- /dev/null +++ b/src/databricks/sql/telemetry/enums/StatementType.py @@ -0,0 +1,9 @@ +from enum import Enum + + +class StatementType(Enum): + NONE = "none" + QUERY = "query" + SQL = "sql" + UPDATE = "update" + METADATA = "metadata"