From fa2eab47559c5915c50ddff830dd6cdbf4207c59 Mon Sep 17 00:00:00 2001 From: Gaurav Sharma Date: Fri, 30 Jan 2026 17:55:37 +0530 Subject: [PATCH 1/3] feat(bulkcopy): Replace **kwargs with explicit parameters for better IDE discoverability - All options now explicit in function signature - Pass params directly to pycore (no kwargs dict conversion) - Matches mssql-tds explicit params API Based on community feedback from discussion #414 --- mssql_python/cursor.py | 66 +++++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index 84bb650d5..a73a53774 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -2452,7 +2452,18 @@ def nextset(self) -> Union[bool, None]: return True def _bulkcopy( - self, table_name: str, data: Iterable[Union[Tuple, List]], **kwargs + self, + table_name: str, + data: Iterable[Union[Tuple, List]], + batch_size: Optional[int] = None, + timeout: Optional[int] = 30, + column_mappings: Optional[List[Tuple[Union[str, int], str]]] = None, + keep_identity: Optional[bool] = None, + check_constraints: Optional[bool] = None, + table_lock: Optional[bool] = None, + keep_nulls: Optional[bool] = None, + fire_triggers: Optional[bool] = None, + use_internal_transaction: Optional[bool] = None, ): # pragma: no cover """ Perform bulk copy operation for high-performance data loading. @@ -2471,20 +2482,33 @@ def _bulkcopy( - The number of values in each row must match the number of columns in the target table - **kwargs: Additional bulk copy options. + batch_size: Number of rows to send per batch. Default uses server optimal. + + timeout: Operation timeout in seconds. Default is 30. + + column_mappings: Maps source data columns to target table column names. + Each tuple is (source, target_column_name) where: + - source: Column name (str) or 0-based index (int) in the source data + - target_column_name: Name of the target column in the database table + + When omitted: Columns are mapped by ordinal position (first data + column → first table column, second → second, etc.) + + When specified: Only the mapped columns are inserted; unmapped + source columns are ignored, and unmapped target columns must + have default values or allow NULL. + + keep_identity: Preserve identity values from source data. - column_mappings (List[Tuple[int, str]], optional): - Maps source data column indices to target table column names. - Each tuple is (source_index, target_column_name) where: - - source_index: 0-based index of the column in the source data - - target_column_name: Name of the target column in the database table + check_constraints: Check constraints during bulk copy. - When omitted: Columns are mapped by ordinal position (first data - column → first table column, second → second, etc.) + table_lock: Use table-level lock instead of row-level locks. - When specified: Only the mapped columns are inserted; unmapped - source columns are ignored, and unmapped target columns must - have default values or allow NULL. + keep_nulls: Preserve null values instead of using default values. + + fire_triggers: Fire insert triggers on the target table. + + use_internal_transaction: Use an internal transaction for each batch. Returns: Dictionary with bulk copy results including: @@ -2523,10 +2547,6 @@ def _bulkcopy( f"data must be an iterable of tuples or lists, got non-iterable {type(data).__name__}" ) - # Extract and validate kwargs with defaults - batch_size = kwargs.get("batch_size", None) - timeout = kwargs.get("timeout", 30) - # Validate batch_size type and value (only if explicitly provided) if batch_size is not None: if not isinstance(batch_size, (int, float)): @@ -2599,7 +2619,19 @@ def _bulkcopy( pycore_connection = mssql_py_core.PyCoreConnection(pycore_context) pycore_cursor = pycore_connection.cursor() - result = pycore_cursor.bulkcopy(table_name, iter(data), **kwargs) + result = pycore_cursor.bulkcopy( + table_name, + iter(data), + batch_size=batch_size, + timeout=timeout, + column_mappings=column_mappings, + keep_identity=keep_identity, + check_constraints=check_constraints, + table_lock=table_lock, + keep_nulls=keep_nulls, + fire_triggers=fire_triggers, + use_internal_transaction=use_internal_transaction, + ) return result From f01756bfd3182bd1917b8be346148f1106653972 Mon Sep 17 00:00:00 2001 From: Gaurav Sharma Date: Mon, 2 Feb 2026 10:37:03 +0530 Subject: [PATCH 2/3] fix(bulkcopy): only pass non-None kwargs to Rust After Saurabh's review, Rust bulkcopy now uses direct types (bool, usize) instead of Option. Python must not pass None values - instead omit the kwarg and let PyO3 use its defaults. --- mssql_python/cursor.py | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index a73a53774..ed35a124e 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -2619,18 +2619,32 @@ def _bulkcopy( pycore_connection = mssql_py_core.PyCoreConnection(pycore_context) pycore_cursor = pycore_connection.cursor() + # Build kwargs dynamically - only pass non-None values + # This lets PyO3/Rust use its defined defaults for unspecified params + bulkcopy_kwargs = {} + if batch_size is not None: + bulkcopy_kwargs["batch_size"] = batch_size + if timeout is not None: + bulkcopy_kwargs["timeout"] = timeout + if column_mappings is not None: + bulkcopy_kwargs["column_mappings"] = column_mappings + if keep_identity is not None: + bulkcopy_kwargs["keep_identity"] = keep_identity + if check_constraints is not None: + bulkcopy_kwargs["check_constraints"] = check_constraints + if table_lock is not None: + bulkcopy_kwargs["table_lock"] = table_lock + if keep_nulls is not None: + bulkcopy_kwargs["keep_nulls"] = keep_nulls + if fire_triggers is not None: + bulkcopy_kwargs["fire_triggers"] = fire_triggers + if use_internal_transaction is not None: + bulkcopy_kwargs["use_internal_transaction"] = use_internal_transaction + result = pycore_cursor.bulkcopy( table_name, iter(data), - batch_size=batch_size, - timeout=timeout, - column_mappings=column_mappings, - keep_identity=keep_identity, - check_constraints=check_constraints, - table_lock=table_lock, - keep_nulls=keep_nulls, - fire_triggers=fire_triggers, - use_internal_transaction=use_internal_transaction, + **bulkcopy_kwargs, ) return result From a1994226f71b5ef9718cae07c91cd7c0cc3f2bb2 Mon Sep 17 00:00:00 2001 From: Gaurav Sharma Date: Mon, 2 Feb 2026 12:50:26 +0530 Subject: [PATCH 3/3] Refactor _bulkcopy to explicit params with proper defaults - batch_size: int = 0 (0 means server optimal) - timeout: int = 30 - column_mappings: Optional[Union[List[str], List[Tuple[int, str]]]] = None - All boolean flags: bool = False (not Optional[bool] = None) Updated docstring with two column_mappings formats: - Simple: List[str] - position = source index - Advanced: List[Tuple[int, str]] - explicit (index, column) mapping Simplified bulkcopy call to pass params directly (no kwargs dict) since Rust API now uses explicit defaults per Saurabh's review. --- mssql_python/cursor.py | 90 +++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 50 deletions(-) diff --git a/mssql_python/cursor.py b/mssql_python/cursor.py index ed35a124e..3dd7aa283 100644 --- a/mssql_python/cursor.py +++ b/mssql_python/cursor.py @@ -2455,15 +2455,15 @@ def _bulkcopy( self, table_name: str, data: Iterable[Union[Tuple, List]], - batch_size: Optional[int] = None, - timeout: Optional[int] = 30, - column_mappings: Optional[List[Tuple[Union[str, int], str]]] = None, - keep_identity: Optional[bool] = None, - check_constraints: Optional[bool] = None, - table_lock: Optional[bool] = None, - keep_nulls: Optional[bool] = None, - fire_triggers: Optional[bool] = None, - use_internal_transaction: Optional[bool] = None, + batch_size: int = 0, + timeout: int = 30, + column_mappings: Optional[Union[List[str], List[Tuple[int, str]]]] = None, + keep_identity: bool = False, + check_constraints: bool = False, + table_lock: bool = False, + keep_nulls: bool = False, + fire_triggers: bool = False, + use_internal_transaction: bool = False, ): # pragma: no cover """ Perform bulk copy operation for high-performance data loading. @@ -2482,22 +2482,27 @@ def _bulkcopy( - The number of values in each row must match the number of columns in the target table - batch_size: Number of rows to send per batch. Default uses server optimal. + batch_size: Number of rows to send per batch. Default 0 uses server optimal. timeout: Operation timeout in seconds. Default is 30. column_mappings: Maps source data columns to target table column names. - Each tuple is (source, target_column_name) where: - - source: Column name (str) or 0-based index (int) in the source data - - target_column_name: Name of the target column in the database table + Two formats supported: + + Simple Format - List[str]: + List of destination column names in order. Position in list = source index. + Example: ['UserID', 'FirstName', 'Email'] + Maps: index 0 → UserID, index 1 → FirstName, index 2 → Email + + Advanced Format - List[Tuple[int, str]]: + Explicit index mapping. Allows skipping or reordering columns. + Each tuple is (source_index, target_column_name). + Example: [(0, 'UserID'), (1, 'FirstName'), (3, 'Email')] + Maps: index 0 → UserID, index 1 → FirstName, index 3 → Email (skips index 2) When omitted: Columns are mapped by ordinal position (first data column → first table column, second → second, etc.) - When specified: Only the mapped columns are inserted; unmapped - source columns are ignored, and unmapped target columns must - have default values or allow NULL. - keep_identity: Preserve identity values from source data. check_constraints: Check constraints during bulk copy. @@ -2547,18 +2552,17 @@ def _bulkcopy( f"data must be an iterable of tuples or lists, got non-iterable {type(data).__name__}" ) - # Validate batch_size type and value (only if explicitly provided) - if batch_size is not None: - if not isinstance(batch_size, (int, float)): - raise TypeError( - f"batch_size must be a positive integer, got {type(batch_size).__name__}" - ) - if batch_size <= 0: - raise ValueError(f"batch_size must be positive, got {batch_size}") + # Validate batch_size type and value (0 means server optimal) + if not isinstance(batch_size, int): + raise TypeError( + f"batch_size must be a non-negative integer, got {type(batch_size).__name__}" + ) + if batch_size < 0: + raise ValueError(f"batch_size must be non-negative, got {batch_size}") # Validate timeout type and value - if not isinstance(timeout, (int, float)): - raise TypeError(f"timeout must be a positive number, got {type(timeout).__name__}") + if not isinstance(timeout, int): + raise TypeError(f"timeout must be a positive integer, got {type(timeout).__name__}") if timeout <= 0: raise ValueError(f"timeout must be positive, got {timeout}") @@ -2619,32 +2623,18 @@ def _bulkcopy( pycore_connection = mssql_py_core.PyCoreConnection(pycore_context) pycore_cursor = pycore_connection.cursor() - # Build kwargs dynamically - only pass non-None values - # This lets PyO3/Rust use its defined defaults for unspecified params - bulkcopy_kwargs = {} - if batch_size is not None: - bulkcopy_kwargs["batch_size"] = batch_size - if timeout is not None: - bulkcopy_kwargs["timeout"] = timeout - if column_mappings is not None: - bulkcopy_kwargs["column_mappings"] = column_mappings - if keep_identity is not None: - bulkcopy_kwargs["keep_identity"] = keep_identity - if check_constraints is not None: - bulkcopy_kwargs["check_constraints"] = check_constraints - if table_lock is not None: - bulkcopy_kwargs["table_lock"] = table_lock - if keep_nulls is not None: - bulkcopy_kwargs["keep_nulls"] = keep_nulls - if fire_triggers is not None: - bulkcopy_kwargs["fire_triggers"] = fire_triggers - if use_internal_transaction is not None: - bulkcopy_kwargs["use_internal_transaction"] = use_internal_transaction - result = pycore_cursor.bulkcopy( table_name, iter(data), - **bulkcopy_kwargs, + batch_size=batch_size, + timeout=timeout, + column_mappings=column_mappings, + keep_identity=keep_identity, + check_constraints=check_constraints, + table_lock=table_lock, + keep_nulls=keep_nulls, + fire_triggers=fire_triggers, + use_internal_transaction=use_internal_transaction, ) return result