5454INFO_TYPE_STRING_THRESHOLD : int = 10000
5555
5656# UTF-16 encoding variants that should use SQL_WCHAR by default
57- UTF16_ENCODINGS : frozenset [str ] = frozenset (["utf-16" , "utf-16le" , "utf-16be" ])
57+ # Note: "utf-16" with BOM is NOT included as it's problematic for SQL_WCHAR
58+ UTF16_ENCODINGS : frozenset [str ] = frozenset (["utf-16le" , "utf-16be" ])
59+
60+ # Valid encoding characters (alphanumeric, dash, underscore only)
61+ import string
62+ VALID_ENCODING_CHARS : frozenset [str ] = frozenset (string .ascii_letters + string .digits + '-_' )
5863
5964
6065def _validate_encoding (encoding : str ) -> bool :
@@ -70,7 +75,17 @@ def _validate_encoding(encoding: str) -> bool:
7075 Note:
7176 Uses LRU cache to avoid repeated expensive codecs.lookup() calls.
7277 Cache size is limited to 128 entries which should cover most use cases.
78+ Also validates that encoding name only contains safe characters.
7379 """
80+ # First check for dangerous characters (security validation)
81+ if not all (c in VALID_ENCODING_CHARS for c in encoding ):
82+ return False
83+
84+ # Check length limit (prevent DOS)
85+ if len (encoding ) > 100 :
86+ return False
87+
88+ # Then check if it's a valid Python codec
7489 try :
7590 codecs .lookup (encoding )
7691 return True
@@ -226,6 +241,11 @@ def __init__(
226241 # Initialize output converters dictionary and its lock for thread safety
227242 self ._output_converters = {}
228243 self ._converters_lock = threading .Lock ()
244+
245+ # Initialize encoding/decoding settings lock for thread safety
246+ # This lock protects both _encoding_settings and _decoding_settings dictionaries
247+ # to prevent race conditions when multiple threads are reading/writing encoding settings
248+ self ._encoding_lock = threading .RLock () # RLock allows recursive locking
229249
230250 # Initialize search escape character
231251 self ._searchescape = None
@@ -429,6 +449,20 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
429449 # Normalize encoding to casefold for more robust Unicode handling
430450 encoding = encoding .casefold ()
431451 logger .debug ("setencoding: Encoding normalized to %s" , encoding )
452+
453+ # Reject 'utf-16' with BOM for SQL_WCHAR (ambiguous byte order)
454+ if encoding == "utf-16" and ctype == ConstantsDDBC .SQL_WCHAR .value :
455+ logger .debug (
456+ "warning" ,
457+ "utf-16 with BOM rejected for SQL_WCHAR" ,
458+ )
459+ raise ProgrammingError (
460+ driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
461+ ddbc_error = (
462+ "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
463+ "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
464+ ),
465+ )
432466
433467 # Set default ctype based on encoding if not provided
434468 if ctype is None :
@@ -455,9 +489,34 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
455489 f"SQL_WCHAR ({ ConstantsDDBC .SQL_WCHAR .value } )"
456490 ),
457491 )
492+
493+ # Validate that SQL_WCHAR ctype only used with UTF-16 encodings (not utf-16 with BOM)
494+ if ctype == ConstantsDDBC .SQL_WCHAR .value :
495+ if encoding == "utf-16" :
496+ raise ProgrammingError (
497+ driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
498+ ddbc_error = (
499+ "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
500+ "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
501+ ),
502+ )
503+ elif encoding not in UTF16_ENCODINGS :
504+ logger .debug (
505+ "warning" ,
506+ "Non-UTF-16 encoding %s attempted with SQL_WCHAR ctype" ,
507+ sanitize_user_input (encoding ),
508+ )
509+ raise ProgrammingError (
510+ driver_error = f"SQL_WCHAR only supports UTF-16 encodings" ,
511+ ddbc_error = (
512+ f"Cannot use encoding '{ encoding } ' with SQL_WCHAR. "
513+ f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
514+ ),
515+ )
458516
459- # Store the encoding settings
460- self ._encoding_settings = {"encoding" : encoding , "ctype" : ctype }
517+ # Store the encoding settings (thread-safe with lock)
518+ with self ._encoding_lock :
519+ self ._encoding_settings = {"encoding" : encoding , "ctype" : ctype }
461520
462521 # Log with sanitized values for security
463522 logger .debug (
@@ -469,7 +528,7 @@ def setencoding(self, encoding: Optional[str] = None, ctype: Optional[int] = Non
469528
470529 def getencoding (self ) -> Dict [str , Union [str , int ]]:
471530 """
472- Gets the current text encoding settings.
531+ Gets the current text encoding settings (thread-safe) .
473532
474533 Returns:
475534 dict: A dictionary containing 'encoding' and 'ctype' keys.
@@ -481,14 +540,19 @@ def getencoding(self) -> Dict[str, Union[str, int]]:
481540 settings = cnxn.getencoding()
482541 print(f"Current encoding: {settings['encoding']}")
483542 print(f"Current ctype: {settings['ctype']}")
543+
544+ Note:
545+ This method is thread-safe and can be called from multiple threads concurrently.
484546 """
485547 if self ._closed :
486548 raise InterfaceError (
487549 driver_error = "Connection is closed" ,
488550 ddbc_error = "Connection is closed" ,
489551 )
490552
491- return self ._encoding_settings .copy ()
553+ # Thread-safe read with lock to prevent race conditions
554+ with self ._encoding_lock :
555+ return self ._encoding_settings .copy ()
492556
493557 def setdecoding (
494558 self , sqltype : int , encoding : Optional [str ] = None , ctype : Optional [int ] = None
@@ -574,6 +638,38 @@ def setdecoding(
574638
575639 # Normalize encoding to lowercase for consistency
576640 encoding = encoding .lower ()
641+
642+ # Reject 'utf-16' with BOM for SQL_WCHAR (ambiguous byte order)
643+ if sqltype == ConstantsDDBC .SQL_WCHAR .value and encoding == "utf-16" :
644+ logger .debug (
645+ "warning" ,
646+ "utf-16 with BOM rejected for SQL_WCHAR" ,
647+ )
648+ raise ProgrammingError (
649+ driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
650+ ddbc_error = (
651+ "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
652+ "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
653+ ),
654+ )
655+
656+ # Validate SQL_WCHAR only supports UTF-16 encodings (SQL_WMETADATA is more flexible)
657+ if sqltype == ConstantsDDBC .SQL_WCHAR .value and encoding not in UTF16_ENCODINGS :
658+ logger .debug (
659+ "warning" ,
660+ "Non-UTF-16 encoding %s attempted with SQL_WCHAR sqltype" ,
661+ sanitize_user_input (encoding ),
662+ )
663+ raise ProgrammingError (
664+ driver_error = f"SQL_WCHAR only supports UTF-16 encodings" ,
665+ ddbc_error = (
666+ f"Cannot use encoding '{ encoding } ' with SQL_WCHAR. "
667+ f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
668+ ),
669+ )
670+
671+ # SQL_WMETADATA can use any valid encoding (UTF-8, UTF-16, etc.)
672+ # No restriction needed here - let users configure as needed
577673
578674 # Set default ctype based on encoding if not provided
579675 if ctype is None :
@@ -597,9 +693,34 @@ def setdecoding(
597693 f"SQL_WCHAR ({ ConstantsDDBC .SQL_WCHAR .value } )"
598694 ),
599695 )
696+
697+ # Validate that SQL_WCHAR ctype only used with UTF-16 encodings (not utf-16 with BOM)
698+ if ctype == ConstantsDDBC .SQL_WCHAR .value :
699+ if encoding == "utf-16" :
700+ raise ProgrammingError (
701+ driver_error = "UTF-16 with Byte Order Mark not supported for SQL_WCHAR" ,
702+ ddbc_error = (
703+ "Cannot use 'utf-16' encoding with SQL_WCHAR due to Byte Order Mark ambiguity. "
704+ "Use 'utf-16le' or 'utf-16be' instead for explicit byte order."
705+ ),
706+ )
707+ elif encoding not in UTF16_ENCODINGS :
708+ logger .debug (
709+ "warning" ,
710+ "Non-UTF-16 encoding %s attempted with SQL_WCHAR ctype" ,
711+ sanitize_user_input (encoding ),
712+ )
713+ raise ProgrammingError (
714+ driver_error = f"SQL_WCHAR ctype only supports UTF-16 encodings" ,
715+ ddbc_error = (
716+ f"Cannot use encoding '{ encoding } ' with SQL_WCHAR ctype. "
717+ f"SQL_WCHAR requires UTF-16 encodings (utf-16le, utf-16be)"
718+ ),
719+ )
600720
601- # Store the decoding settings for the specified sqltype
602- self ._decoding_settings [sqltype ] = {"encoding" : encoding , "ctype" : ctype }
721+ # Store the decoding settings for the specified sqltype (thread-safe with lock)
722+ with self ._encoding_lock :
723+ self ._decoding_settings [sqltype ] = {"encoding" : encoding , "ctype" : ctype }
603724
604725 # Log with sanitized values for security
605726 sqltype_name = {
@@ -618,7 +739,7 @@ def setdecoding(
618739
619740 def getdecoding (self , sqltype : int ) -> Dict [str , Union [str , int ]]:
620741 """
621- Gets the current text decoding settings for the specified SQL type.
742+ Gets the current text decoding settings for the specified SQL type (thread-safe) .
622743
623744 Args:
624745 sqltype (int): The SQL type to get settings for: SQL_CHAR, SQL_WCHAR, or SQL_WMETADATA.
@@ -634,6 +755,9 @@ def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
634755 settings = cnxn.getdecoding(mssql_python.SQL_CHAR)
635756 print(f"SQL_CHAR encoding: {settings['encoding']}")
636757 print(f"SQL_CHAR ctype: {settings['ctype']}")
758+
759+ Note:
760+ This method is thread-safe and can be called from multiple threads concurrently.
637761 """
638762 if self ._closed :
639763 raise InterfaceError (
@@ -657,7 +781,9 @@ def getdecoding(self, sqltype: int) -> Dict[str, Union[str, int]]:
657781 ),
658782 )
659783
660- return self ._decoding_settings [sqltype ].copy ()
784+ # Thread-safe read with lock to prevent race conditions
785+ with self ._encoding_lock :
786+ return self ._decoding_settings [sqltype ].copy ()
661787
662788 def set_attr (self , attribute : int , value : Union [int , str , bytes , bytearray ]) -> None :
663789 """
0 commit comments