Skip to content

Commit 8ff6552

Browse files
committed
Merge remote-tracking branch 'target/main' into close-conn
2 parents d90ac80 + bcab1df commit 8ff6552

32 files changed

+2067
-184
lines changed

.github/workflows/code-quality-checks.yml

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,16 @@ jobs:
88
strategy:
99
matrix:
1010
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
11+
dependency-version: ["default", "min"]
12+
# Optimize matrix - test min/max on subset of Python versions
13+
exclude:
14+
- python-version: "3.12"
15+
dependency-version: "min"
16+
- python-version: "3.13"
17+
dependency-version: "min"
18+
19+
name: "Unit Tests (Python ${{ matrix.python-version }}, ${{ matrix.dependency-version }} deps)"
20+
1121
steps:
1222
#----------------------------------------------
1323
# check-out repo and set-up python
@@ -37,7 +47,7 @@ jobs:
3747
uses: actions/cache@v4
3848
with:
3949
path: .venv
40-
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }}
50+
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ matrix.dependency-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }}
4151
#----------------------------------------------
4252
# install dependencies if cache does not exist
4353
#----------------------------------------------
@@ -50,15 +60,47 @@ jobs:
5060
- name: Install library
5161
run: poetry install --no-interaction
5262
#----------------------------------------------
63+
# override with custom dependency versions
64+
#----------------------------------------------
65+
- name: Install Python tools for custom versions
66+
if: matrix.dependency-version != 'default'
67+
run: poetry run pip install toml packaging
68+
69+
- name: Generate requirements file
70+
if: matrix.dependency-version != 'default'
71+
run: |
72+
poetry run python scripts/dependency_manager.py ${{ matrix.dependency-version }} --output requirements-${{ matrix.dependency-version }}.txt
73+
echo "Generated requirements for ${{ matrix.dependency-version }} versions:"
74+
cat requirements-${{ matrix.dependency-version }}.txt
75+
76+
- name: Override with custom dependency versions
77+
if: matrix.dependency-version != 'default'
78+
run: poetry run pip install -r requirements-${{ matrix.dependency-version }}.txt
79+
80+
#----------------------------------------------
5381
# run test suite
5482
#----------------------------------------------
83+
- name: Show installed versions
84+
run: |
85+
echo "=== Dependency Version: ${{ matrix.dependency-version }} ==="
86+
poetry run pip list
87+
5588
- name: Run tests
5689
run: poetry run python -m pytest tests/unit
5790
run-unit-tests-with-arrow:
5891
runs-on: ubuntu-latest
5992
strategy:
6093
matrix:
6194
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
95+
dependency-version: ["default", "min"]
96+
exclude:
97+
- python-version: "3.12"
98+
dependency-version: "min"
99+
- python-version: "3.13"
100+
dependency-version: "min"
101+
102+
name: "Unit Tests + PyArrow (Python ${{ matrix.python-version }}, ${{ matrix.dependency-version }} deps)"
103+
62104
steps:
63105
#----------------------------------------------
64106
# check-out repo and set-up python
@@ -88,7 +130,7 @@ jobs:
88130
uses: actions/cache@v4
89131
with:
90132
path: .venv-pyarrow
91-
key: venv-pyarrow-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }}
133+
key: venv-pyarrow-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ matrix.dependency-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }}
92134
#----------------------------------------------
93135
# install dependencies if cache does not exist
94136
#----------------------------------------------
@@ -101,8 +143,30 @@ jobs:
101143
- name: Install library
102144
run: poetry install --no-interaction --all-extras
103145
#----------------------------------------------
146+
# override with custom dependency versions
147+
#----------------------------------------------
148+
- name: Install Python tools for custom versions
149+
if: matrix.dependency-version != 'default'
150+
run: poetry run pip install toml packaging
151+
152+
- name: Generate requirements file with pyarrow
153+
if: matrix.dependency-version != 'default'
154+
run: |
155+
poetry run python scripts/dependency_manager.py ${{ matrix.dependency-version }} --output requirements-${{ matrix.dependency-version }}-arrow.txt
156+
echo "Generated requirements for ${{ matrix.dependency-version }} versions with PyArrow:"
157+
cat requirements-${{ matrix.dependency-version }}-arrow.txt
158+
159+
- name: Override with custom dependency versions
160+
if: matrix.dependency-version != 'default'
161+
run: poetry run pip install -r requirements-${{ matrix.dependency-version }}-arrow.txt
162+
#----------------------------------------------
104163
# run test suite
105164
#----------------------------------------------
165+
- name: Show installed versions
166+
run: |
167+
echo "=== Dependency Version: ${{ matrix.dependency-version }} with PyArrow ==="
168+
poetry run pip list
169+
106170
- name: Run tests
107171
run: poetry run python -m pytest tests/unit
108172
check-linting:

CHANGELOG.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,37 @@
11
# Release History
22

3+
# 4.1.2 (2025-08-22)
4+
- Streaming ingestion support for PUT operation (databricks/databricks-sql-python#643 by @sreekanth-db)
5+
- Removed use_threads argument on concat_tables for compatibility with pyarrow<14 (databricks/databricks-sql-python#684 by @jprakash-db)
6+
7+
# 4.1.1 (2025-08-21)
8+
- Add documentation for proxy support (databricks/databricks-sql-python#680 by @vikrantpuppala)
9+
- Fix compatibility with urllib3<2 and add CI actions to improve dependency checks (databricks/databricks-sql-python#678 by @vikrantpuppala)
10+
11+
# 4.1.0 (2025-08-18)
12+
- Removed Codeowners (databricks/databricks-sql-python#623 by @jprakash-db)
13+
- Azure Service Principal Credential Provider (databricks/databricks-sql-python#621 by @jprakash-db)
14+
- Add optional telemetry support to the python connector (databricks/databricks-sql-python#628 by @saishreeeee)
15+
- Fix potential resource leak in `CloudFetchQueue` (databricks/databricks-sql-python#624 by @varun-edachali-dbx)
16+
- Generalise Backend Layer (databricks/databricks-sql-python#604 by @varun-edachali-dbx)
17+
- Arrow performance optimizations (databricks/databricks-sql-python#638 by @jprakash-db)
18+
- Connection errors to unauthenticated telemetry endpoint (databricks/databricks-sql-python#619 by @saishreeeee)
19+
- SEA: Execution Phase (databricks/databricks-sql-python#645 by @varun-edachali-dbx)
20+
- Add retry mechanism to telemetry requests (databricks/databricks-sql-python#617 by @saishreeeee)
21+
- SEA: Fetch Phase (databricks/databricks-sql-python#650 by @varun-edachali-dbx)
22+
- added logs for cloud fetch speed (databricks/databricks-sql-python#654 by @shivam2680)
23+
- Make telemetry batch size configurable and add time-based flush (databricks/databricks-sql-python#622 by @saishreeeee)
24+
- Normalise type code (databricks/databricks-sql-python#652 by @varun-edachali-dbx)
25+
- Testing for telemetry (databricks/databricks-sql-python#616 by @saishreeeee)
26+
- Bug fixes in telemetry (databricks/databricks-sql-python#659 by @saishreeeee)
27+
- Telemetry server-side flag integration (databricks/databricks-sql-python#646 by @saishreeeee)
28+
- Enhance SEA HTTP Client (databricks/databricks-sql-python#618 by @varun-edachali-dbx)
29+
- SEA: Allow large metadata responses (databricks/databricks-sql-python#653 by @varun-edachali-dbx)
30+
- Added code coverage workflow to test the code coverage from unit and e2e tests (databricks/databricks-sql-python#657 by @msrathore-db)
31+
- Concat tables to be backward compatible (databricks/databricks-sql-python#647 by @jprakash-db)
32+
- Refactor codebase to use a unified http client (databricks/databricks-sql-python#673 by @vikrantpuppala)
33+
- Add kerberos support for proxy auth (databricks/databricks-sql-python#675 by @vikrantpuppala)
34+
335
# 4.0.5 (2025-06-24)
436
- Fix: Reverted change in cursor close handling which led to errors impacting users (databricks/databricks-sql-python#613 by @madhav-db)
537

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ The Databricks SQL Connector for Python allows you to develop Python application
77

88
This connector uses Arrow as the data-exchange format, and supports APIs (e.g. `fetchmany_arrow`) to directly fetch Arrow tables. Arrow tables are wrapped in the `ArrowQueue` class to provide a natural API to get several rows at a time. [PyArrow](https://arrow.apache.org/docs/python/index.html) is required to enable this and use these APIs, you can install it via `pip install pyarrow` or `pip install databricks-sql-connector[pyarrow]`.
99

10+
The connector includes built-in support for HTTP/HTTPS proxy servers with multiple authentication methods including basic authentication and Kerberos/Negotiate authentication. See `docs/proxy.md` and `examples/proxy_authentication.py` for details.
11+
1012
You are welcome to file an issue here for general use cases. You can also contact Databricks Support [here](help.databricks.com).
1113

1214
## Requirements

docs/proxy.md

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
# Proxy Support
2+
3+
The Databricks SQL Connector supports connecting through HTTP and HTTPS proxy servers with various authentication methods. This feature automatically detects system proxy configuration and handles proxy authentication transparently.
4+
5+
## Quick Start
6+
7+
The connector automatically uses your system's proxy configuration when available:
8+
9+
```python
10+
from databricks import sql
11+
12+
# Basic connection - uses system proxy automatically
13+
with sql.connect(
14+
server_hostname="your-workspace.cloud.databricks.com",
15+
http_path="/sql/1.0/endpoints/your-endpoint-id",
16+
access_token="your-token"
17+
) as connection:
18+
# Your queries here...
19+
```
20+
21+
For advanced proxy authentication (like Kerberos), specify the authentication method:
22+
23+
```python
24+
with sql.connect(
25+
server_hostname="your-workspace.cloud.databricks.com",
26+
http_path="/sql/1.0/endpoints/your-endpoint-id",
27+
access_token="your-token",
28+
_proxy_auth_method="negotiate" # Enable Kerberos proxy auth
29+
) as connection:
30+
# Your queries here...
31+
```
32+
33+
## Proxy Configuration
34+
35+
### Environment Variables
36+
37+
The connector follows standard proxy environment variable conventions:
38+
39+
| Variable | Description | Example |
40+
|----------|-------------|---------|
41+
| `HTTP_PROXY` | Proxy for HTTP requests | `http://proxy.company.com:8080` |
42+
| `HTTPS_PROXY` | Proxy for HTTPS requests | `https://proxy.company.com:8080` |
43+
| `NO_PROXY` | Hosts to bypass proxy | `localhost,127.0.0.1,.company.com` |
44+
45+
**Note**: The connector also recognizes lowercase versions (`http_proxy`, `https_proxy`, `no_proxy`).
46+
47+
### Proxy URL Formats
48+
49+
Basic proxy (no authentication):
50+
```bash
51+
export HTTPS_PROXY="http://proxy.company.com:8080"
52+
```
53+
54+
Proxy with basic authentication:
55+
```bash
56+
export HTTPS_PROXY="http://username:password@proxy.company.com:8080"
57+
```
58+
59+
## Authentication Methods
60+
61+
The connector supports multiple proxy authentication methods via the `_proxy_auth_method` parameter:
62+
63+
### 1. Basic Authentication (`basic` or `None`)
64+
65+
**Default behavior** when credentials are provided in the proxy URL or when `_proxy_auth_method="basic"` is specified.
66+
67+
```python
68+
# Method 1: Credentials in proxy URL (recommended)
69+
# Set environment: HTTPS_PROXY="http://user:pass@proxy.company.com:8080"
70+
with sql.connect(
71+
server_hostname="your-workspace.com",
72+
http_path="/sql/1.0/endpoints/abc123",
73+
access_token="your-token"
74+
# No _proxy_auth_method needed - detected automatically
75+
) as conn:
76+
pass
77+
78+
# Method 2: Explicit basic authentication
79+
with sql.connect(
80+
server_hostname="your-workspace.com",
81+
http_path="/sql/1.0/endpoints/abc123",
82+
access_token="your-token",
83+
_proxy_auth_method="basic" # Explicit basic auth
84+
) as conn:
85+
pass
86+
```
87+
88+
### 2. Kerberos/Negotiate Authentication (`negotiate`)
89+
90+
For corporate environments using Kerberos authentication with proxy servers.
91+
92+
**Prerequisites:**
93+
- Valid Kerberos tickets (run `kinit` first)
94+
- Properly configured Kerberos environment
95+
96+
```python
97+
with sql.connect(
98+
server_hostname="your-workspace.com",
99+
http_path="/sql/1.0/endpoints/abc123",
100+
access_token="your-token",
101+
_proxy_auth_method="negotiate" # Enable Kerberos proxy auth
102+
) as conn:
103+
pass
104+
```
105+
106+
**Kerberos Setup Example:**
107+
```bash
108+
# Obtain Kerberos tickets
109+
kinit your-username@YOUR-DOMAIN.COM
110+
111+
# Set proxy (no credentials in URL for Kerberos)
112+
export HTTPS_PROXY="http://proxy.company.com:8080"
113+
114+
# Run your Python script
115+
python your_script.py
116+
```
117+
118+
## Proxy Bypass
119+
120+
The connector respects system proxy bypass rules. Requests to hosts listed in `NO_PROXY` or system bypass lists will connect directly, bypassing the proxy.
121+
122+
```bash
123+
# Bypass proxy for local and internal hosts
124+
export NO_PROXY="localhost,127.0.0.1,*.internal.company.com,10.*"
125+
```
126+
127+
## Advanced Configuration
128+
129+
### Per-Request Proxy Decisions
130+
131+
The connector automatically makes per-request decisions about proxy usage based on:
132+
133+
1. **System proxy configuration** - Detected from environment variables
134+
2. **Proxy bypass rules** - Honor `NO_PROXY` and system bypass settings
135+
3. **Target host** - Check if the specific host should use proxy
136+
137+
### Connection Pooling
138+
139+
The connector maintains separate connection pools for direct and proxy connections, allowing efficient handling of mixed proxy/direct traffic.
140+
141+
### SSL/TLS with Proxy
142+
143+
HTTPS connections through HTTP proxies use the CONNECT method for SSL tunneling. The connector handles this automatically while preserving all SSL verification settings.
144+
145+
## Troubleshooting
146+
147+
### Common Issues
148+
149+
**Problem**: Connection fails with proxy-related errors
150+
```
151+
Solution:
152+
1. Verify proxy environment variables are set correctly
153+
2. Check if proxy requires authentication
154+
3. Ensure proxy allows CONNECT method for HTTPS
155+
4. Test proxy connectivity with curl:
156+
curl -x $HTTPS_PROXY https://your-workspace.com
157+
```
158+
159+
**Problem**: Kerberos authentication fails
160+
```
161+
Solution:
162+
1. Verify Kerberos tickets: klist
163+
2. Renew tickets if expired: kinit
164+
3. Check proxy supports negotiate authentication
165+
4. Ensure time synchronization between client and KDC
166+
```
167+
168+
**Problem**: Some requests bypass proxy unexpectedly
169+
```
170+
Solution:
171+
1. Check NO_PROXY environment variable
172+
2. Review system proxy bypass settings
173+
3. Verify the target hostname format
174+
```
175+
176+
### Debug Logging
177+
178+
Enable detailed logging to troubleshoot proxy issues:
179+
180+
```python
181+
import logging
182+
183+
# Enable connector debug logging
184+
logging.basicConfig(level=logging.DEBUG)
185+
logging.getLogger("databricks.sql").setLevel(logging.DEBUG)
186+
187+
# Enable urllib3 logging for HTTP details
188+
logging.getLogger("urllib3").setLevel(logging.DEBUG)
189+
```
190+
191+
### Testing Proxy Configuration
192+
193+
Use the provided example script to test different proxy authentication methods:
194+
195+
```bash
196+
cd examples/
197+
python proxy_authentication.py
198+
```
199+
200+
This script tests:
201+
- Default proxy behavior
202+
- Basic authentication
203+
- Kerberos/Negotiate authentication
204+
205+
## Examples
206+
207+
See `examples/proxy_authentication.py` for a comprehensive demonstration of proxy authentication methods.
208+
209+
## Implementation Details
210+
211+
### How Proxy Detection Works
212+
213+
1. **Environment Variables**: Check `HTTP_PROXY`/`HTTPS_PROXY` environment variables
214+
2. **System Configuration**: Use Python's `urllib.request.getproxies()` to detect system settings
215+
3. **Bypass Rules**: Honor `NO_PROXY` and `urllib.request.proxy_bypass()` rules
216+
4. **Per-Request Logic**: Decide proxy usage for each request based on target host
217+
218+
### Supported Proxy Types
219+
220+
- **HTTP Proxies**: For both HTTP and HTTPS traffic (via CONNECT)
221+
- **HTTPS Proxies**: Encrypted proxy connections
222+
- **Authentication**: Basic, Negotiate/Kerberos
223+
- **Bypass Rules**: Full support for NO_PROXY patterns
224+
225+
### Connection Architecture
226+
227+
The connector uses a unified HTTP client that maintains:
228+
- **Direct Pool Manager**: For non-proxy connections
229+
- **Proxy Pool Manager**: For proxy connections
230+
- **Per-Request Routing**: Automatic selection based on target host
231+
232+
This architecture ensures optimal performance and correct proxy handling across all connector operations.

examples/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,4 @@ this example the string `ExamplePartnerTag` will be added to the the user agent
4242
- **`custom_cred_provider.py`** shows how to pass a custom credential provider to bypass connector authentication. Please install databricks-sdk prior to running this example.
4343
- **`v3_retries_query_execute.py`** shows how to enable v3 retries in connector version 2.9.x including how to enable retries for non-default retry cases.
4444
- **`parameters.py`** shows how to use parameters in native and inline modes.
45+
- **`proxy_authentication.py`** demonstrates how to connect through proxy servers using different authentication methods including basic authentication and Kerberos/Negotiate authentication.

0 commit comments

Comments
 (0)