microsoft
diff --git a/‎mssql_python/connection.py‎
Lines changed: 135 additions & 9 deletions b/‎mssql_python/connection.py‎
Lines changed: 135 additions & 9 deletions
diff --git a/‎mssql_python/cursor.py‎
Lines changed: 68 additions & 5 deletions b/‎mssql_python/cursor.py‎
Lines changed: 68 additions & 5 deletions
@@ -54,7 +54,12 @@
 INFO_TYPE_STRING_THRESHOLD: int = 10000
 
 # UTF-16 encoding variants that should use SQL_WCHAR by default
-UTF16_ENCODINGS: frozenset[str] = frozenset(["utf-16", "utf-16le", "utf-16be"])
+# Note: "utf-16" with BOM is NOT included as it's problematic for SQL_WCHAR
+UTF16_ENCODINGS: frozenset[str] = frozenset(["utf-16le", "utf-16be"])
+
+# Valid encoding characters (alphanumeric, dash, underscore only)
+import string
+VALID_ENCODING_CHARS: frozenset[str] = frozenset(string.ascii_letters + string.digits + '-_')
 
 
 def _validate_encoding(encoding: str) -> bool:
@@ -70,7 +75,17 @@ def _validate_encoding(encoding: str) -> bool:
     Note:
         Uses LRU cache to avoid repeated expensive codecs.lookup() calls.
         Cache size is limited to 128 entries which should cover most use cases.
+        Also validates that encoding name only contains safe characters.
     """
+    # First check for dangerous characters (security validation)
+    if not all(c in VALID_ENCODING_CHARS for c in encoding):
+        return False
+    
+    # Check length limit (prevent DOS)
+    if len(encoding) > 100:
+        return False
+    
+    # Then check if it's a valid Python codec
     try:
         codecs.lookup(encoding)
         return True
@@ -226,6 +241,11 @@ def __init__(
         # Initialize output converters dictionary and its lock for thread safety
         self._output_converters = {}
         self._converters_lock = threading.Lock()
+        
+        # Initialize encoding/decoding settings lock for thread safety
+        # This lock protects both _encoding_settings and _decoding_settings dictionaries
+        # to prevent race conditions when multiple threads are reading/writing encoding settings
+        self._encoding_lock = threading.RLock()  # RLock allows recursive locking
 
         # Initialize search escape character
         self._searchescape = None
@@ -429,6 +449,20 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
         # Normalize encoding to casefold for more robust Unicode handling
         encoding = encoding.casefold()
         logger.debug("setencoding: Encoding normalized to %s", encoding)
+        
+        # Reject 'utf-16' with BOM for SQL_WCHAR (ambiguous byte order)
+        if encoding == "utf-16" and ctype == ConstantsDDBC.SQL_WCHAR.value:
+            logger.debug(
+                "warning",
+                "utf-16 with BOM rejected for SQL_WCHAR",
+            )
+            raise ProgrammingError(
+                driver_error="UTF-16 with Byte Order Mark not supported for SQL_WCHAR",
+                ddbc_error=(
+                    "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
+                    "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
+                ),
+            )
 
         # Set default ctype based on encoding if not provided
         if ctype is None:
@@ -455,9 +489,34 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
                     f"SQL_WCHAR ({ConstantsDDBC.SQL_WCHAR.value})"
                 ),
             )
+        
+        # Validate that SQL_WCHAR ctype only used with UTF-16 encodings (not utf-16 with BOM)
+        if ctype == ConstantsDDBC.SQL_WCHAR.value:
+            if encoding == "utf-16":
+                raise ProgrammingError(
+                    driver_error="UTF-16 with Byte Order Mark not supported for SQL_WCHAR",
+                    ddbc_error=(
+                        "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
+                        "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
+                    ),
+                )
+            elif encoding not in UTF16_ENCODINGS:
+                logger.debug(
+                    "warning",
+                    "Non-UTF-16 encoding %s attempted with SQL_WCHAR ctype",
+                    sanitize_user_input(encoding),
+                )
+                raise ProgrammingError(
+                    driver_error=f"SQL_WCHAR only supports UTF-16 encodings",
+                    ddbc_error=(
+                        f"Cannot use encoding '{encoding}' with SQL_WCHAR. "
+                        f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
+                    ),
+                )
 
-        # Store the encoding settings
-        self._encoding_settings = {"encoding": encoding, "ctype": ctype}
+        # Store the encoding settings (thread-safe with lock)
+        with self._encoding_lock:
+            self._encoding_settings = {"encoding": encoding, "ctype": ctype}
 
         # Log with sanitized values for security
         logger.debug(
@@ -469,7 +528,7 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
 
     def getencoding(self) -> Dict[str, Union[str, int]]:
         """
-        Gets the current text encoding settings.
+        Gets the current text encoding settings (thread-safe).
 
         Returns:
             dict: A dictionary containing 'encoding' and 'ctype' keys.
@@ -481,14 +540,19 @@ def getencoding(self) -> Dict[str, Union[str, int]]:
             settings = cnxn.getencoding()
             print(f"Current encoding: {settings['encoding']}")
             print(f"Current ctype: {settings['ctype']}")
+            
+        Note:
+            This method is thread-safe and can be called from multiple threads concurrently.
         """
         if self._closed:
             raise InterfaceError(
                 driver_error="Connection is closed",
                 ddbc_error="Connection is closed",
             )
 
-        return self._encoding_settings.copy()
+        # Thread-safe read with lock to prevent race conditions
+        with self._encoding_lock:
+            return self._encoding_settings.copy()
 
     def setdecoding(
         self, sqltype: int, encoding: Optional[str] = None, ctype: Optional[int] = None
@@ -574,6 +638,38 @@ def setdecoding(
 
         # Normalize encoding to lowercase for consistency
         encoding = encoding.lower()
+        
+        # Reject 'utf-16' with BOM for SQL_WCHAR (ambiguous byte order)
+        if sqltype == ConstantsDDBC.SQL_WCHAR.value and encoding == "utf-16":
+            logger.debug(
+                "warning",
+                "utf-16 with BOM rejected for SQL_WCHAR",
+            )
+            raise ProgrammingError(
+                driver_error="UTF-16 with Byte Order Mark not supported for SQL_WCHAR",
+                ddbc_error=(
+                    "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
+                    "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
+                ),
+            )
+
+        # Validate SQL_WCHAR only supports UTF-16 encodings (SQL_WMETADATA is more flexible)
+        if sqltype == ConstantsDDBC.SQL_WCHAR.value and encoding not in UTF16_ENCODINGS:
+            logger.debug(
+                "warning",
+                "Non-UTF-16 encoding %s attempted with SQL_WCHAR sqltype",
+                sanitize_user_input(encoding),
+            )
+            raise ProgrammingError(
+                driver_error=f"SQL_WCHAR only supports UTF-16 encodings",
+                ddbc_error=(
+                    f"Cannot use encoding '{encoding}' with SQL_WCHAR. "
+                    f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
+                ),
+            )
+        
+        # SQL_WMETADATA can use any valid encoding (UTF-8, UTF-16, etc.)
+        # No restriction needed here - let users configure as needed
 
         # Set default ctype based on encoding if not provided
         if ctype is None:
@@ -597,9 +693,34 @@ def setdecoding(
                     f"SQL_WCHAR ({ConstantsDDBC.SQL_WCHAR.value})"
                 ),
             )
+        
+        # Validate that SQL_WCHAR ctype only used with UTF-16 encodings (not utf-16 with BOM)
+        if ctype == ConstantsDDBC.SQL_WCHAR.value:
+            if encoding == "utf-16":
+                raise ProgrammingError(
+                    driver_error="UTF-16 with Byte Order Mark not supported for SQL_WCHAR",
+                    ddbc_error=(
+                        "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
+                        "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
+                    ),
+                )
+            elif encoding not in UTF16_ENCODINGS:
+                logger.debug(
+                    "warning",
+                    "Non-UTF-16 encoding %s attempted with SQL_WCHAR ctype",
+                    sanitize_user_input(encoding),
+                )
+                raise ProgrammingError(
+                    driver_error=f"SQL_WCHAR ctype only supports UTF-16 encodings",
+                    ddbc_error=(
+                        f"Cannot use encoding '{encoding}' with SQL_WCHAR ctype. "
+                        f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
+                    ),
+                )
 
-        # Store the decoding settings for the specified sqltype
-        self._decoding_settings[sqltype] = {"encoding": encoding, "ctype": ctype}
+        # Store the decoding settings for the specified sqltype (thread-safe with lock)
+        with self._encoding_lock:
+            self._decoding_settings[sqltype] = {"encoding": encoding, "ctype": ctype}
 
         # Log with sanitized values for security
         sqltype_name = {
@@ -618,7 +739,7 @@ def setdecoding(
 
     def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
         """
-        Gets the current text decoding settings for the specified SQL type.
+        Gets the current text decoding settings for the specified SQL type (thread-safe).
 
         Args:
             sqltype (int): The SQL type to get settings for: SQL_CHAR, SQL_WCHAR, or SQL_WMETADATA.
@@ -634,6 +755,9 @@ def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
             settings = cnxn.getdecoding(mssql_python.SQL_CHAR)
             print(f"SQL_CHAR encoding: {settings['encoding']}")
             print(f"SQL_CHAR ctype: {settings['ctype']}")
+            
+        Note:
+            This method is thread-safe and can be called from multiple threads concurrently.
         """
         if self._closed:
             raise InterfaceError(
@@ -657,7 +781,9 @@ def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
                 ),
             )
 
-        return self._decoding_settings[sqltype].copy()
+        # Thread-safe read with lock to prevent race conditions
+        with self._encoding_lock:
+            return self._decoding_settings[sqltype].copy()
 
     def set_attr(self, attribute: int, value: Union[int, str, bytes, bytearray]) -> None:
         """
 
@@ -20,7 +20,7 @@
 from mssql_python.helpers import check_error
 from mssql_python.logging import logger
 from mssql_python import ddbc_bindings
-from mssql_python.exceptions import InterfaceError, NotSupportedError, ProgrammingError
+from mssql_python.exceptions import InterfaceError, NotSupportedError, ProgrammingError, OperationalError, DatabaseError
 from mssql_python.row import Row
 from mssql_python import get_settings
 
@@ -285,6 +285,53 @@ def _get_numeric_data(self, param: decimal.Decimal) -> Any:
         numeric_data.val = bytes(byte_array)
         return numeric_data
 
+    def _get_encoding_settings(self):
+        """
+        Get the encoding settings from the connection.
+
+        Returns:
+            dict: A dictionary with 'encoding' and 'ctype' keys, or default settings if not available
+        """
+        if hasattr(self._connection, 'getencoding'):
+            try:
+                return self._connection.getencoding()
+            except (OperationalError, DatabaseError) as db_error:
+                # Only catch database-related errors, not programming errors
+                from mssql_python.helpers import log
+                log('warning', f"Failed to get encoding settings from connection due to database error: {db_error}")
+                return {
+                    'encoding': 'utf-16le',
+                    'ctype': ddbc_sql_const.SQL_WCHAR.value
+                }
+
+        # Return default encoding settings if getencoding is not available
+        return {
+            'encoding': 'utf-16le',
+            'ctype': ddbc_sql_const.SQL_WCHAR.value
+        }
+
+    def _get_decoding_settings(self, sql_type):
+        """
+        Get decoding settings for a specific SQL type.
+
+        Args:
+            sql_type: SQL type constant (SQL_CHAR, SQL_WCHAR, etc.)
+
+        Returns:
+            Dictionary containing the decoding settings.
+        """
+        try:
+            # Get decoding settings from connection for this SQL type
+            return self._connection.getdecoding(sql_type)
+        except (OperationalError, DatabaseError) as db_error:
+            # Only handle expected database-related errors
+            from mssql_python.helpers import log
+            log('warning', f"Failed to get decoding settings for SQL type {sql_type} due to database error: {db_error}")
+            if sql_type == ddbc_sql_const.SQL_WCHAR.value:
+                return {'encoding': 'utf-16le', 'ctype': ddbc_sql_const.SQL_WCHAR.value}
+            else:
+                return {'encoding': 'utf-8', 'ctype': ddbc_sql_const.SQL_CHAR.value}
+
     def _map_sql_type(  # pylint: disable=too-many-arguments,too-many-positional-arguments,too-many-locals,too-many-return-statements,too-many-branches
         self,
         param: Any,
@@ -1132,6 +1179,9 @@ def execute(  # pylint: disable=too-many-locals,too-many-branches,too-many-state
         # Clear any previous messages
         self.messages = []
 
+        # Getting encoding setting
+        encoding_settings = self._get_encoding_settings()
+
         # Apply timeout if set (non-zero)
         if self._timeout > 0:
             logger.debug("execute: Setting query timeout=%d seconds", self._timeout)
@@ -1202,6 +1252,7 @@ def execute(  # pylint: disable=too-many-locals,too-many-branches,too-many-state
             parameters_type,
             self.is_stmt_prepared,
             use_prepare,
+            encoding_settings
         )
         # Check return code
         try:
@@ -2027,6 +2078,9 @@ def executemany(  # pylint: disable=too-many-locals,too-many-branches,too-many-s
         # Now transpose the processed parameters
         columnwise_params, row_count = self._transpose_rowwise_to_columnwise(processed_parameters)
 
+        # Get encoding settings
+        encoding_settings = self._get_encoding_settings()
+
         # Add debug logging
         logger.debug(
             "Executing batch query with %d parameter sets:\n%s",
@@ -2038,7 +2092,7 @@ def executemany(  # pylint: disable=too-many-locals,too-many-branches,too-many-s
         )
 
         ret = ddbc_bindings.SQLExecuteMany(
-            self.hstmt, operation, columnwise_params, parameters_type, row_count
+            self.hstmt, operation, columnwise_params, parameters_type, row_count, encoding_settings
         )
 
         # Capture any diagnostic messages after execution
@@ -2070,10 +2124,13 @@ def fetchone(self) -> Union[None, Row]:
         """
         self._check_closed()  # Check if the cursor is closed
 
+        char_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_CHAR.value)
+        wchar_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_WCHAR.value)
+
         # Fetch raw data
         row_data = []
         try:
-            ret = ddbc_bindings.DDBCSQLFetchOne(self.hstmt, row_data)
+            ret = ddbc_bindings.DDBCSQLFetchOne(self.hstmt, row_data, char_decoding.get('encoding', 'utf-8'), wchar_decoding.get('encoding', 'utf-16le'))
 
             if self.hstmt:
                 self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(self.hstmt))
@@ -2121,10 +2178,13 @@ def fetchmany(self, size: Optional[int] = None) -> List[Row]:
         if size <= 0:
             return []
 
+        char_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_CHAR.value)
+        wchar_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_WCHAR.value)
+
         # Fetch raw data
         rows_data = []
         try:
-            _ = ddbc_bindings.DDBCSQLFetchMany(self.hstmt, rows_data, size)
+            ret = ddbc_bindings.DDBCSQLFetchMany(self.hstmt, rows_data, size, char_decoding.get('encoding', 'utf-8'), wchar_decoding.get('encoding', 'utf-16le'))
 
             if self.hstmt:
                 self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(self.hstmt))
@@ -2164,10 +2224,13 @@ def fetchall(self) -> List[Row]:
         if not self._has_result_set and self.description:
             self._reset_rownumber()
 
+        char_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_CHAR.value)
+        wchar_decoding = self._get_decoding_settings(ddbc_sql_const.SQL_WCHAR.value)
+
         # Fetch raw data
         rows_data = []
         try:
-            _ = ddbc_bindings.DDBCSQLFetchAll(self.hstmt, rows_data)
+            ret = ddbc_bindings.DDBCSQLFetchAll(self.hstmt, rows_data, char_decoding.get('encoding', 'utf-8'), wchar_decoding.get('encoding', 'utf-16le'))
 
             if self.hstmt:
                 self.messages.extend(ddbc_bindings.DDBCSQLGetAllDiagRecords(self.hstmt))