2121logger = logging .getLogger (__name__ )
2222
2323
24+ def _safe_float_conversion (value : Any , default : float = 0.0 ) -> float :
25+ """
26+ Safely convert a value to float, handling strings and None values.
27+
28+ Args:
29+ value: Value to convert to float
30+ default: Default value if conversion fails
31+
32+ Returns:
33+ Float value or default if conversion fails
34+ """
35+ if value is None :
36+ return default
37+
38+ if isinstance (value , (int , float )):
39+ return float (value )
40+
41+ if isinstance (value , str ):
42+ # Handle empty strings
43+ if not value .strip ():
44+ return default
45+ try :
46+ return float (value )
47+ except (ValueError , TypeError ):
48+ logger .warning (
49+ f"Could not convert string '{ value } ' to float, using default { default } "
50+ )
51+ return default
52+
53+ # Handle other types by attempting conversion
54+ try :
55+ return float (value )
56+ except (ValueError , TypeError ):
57+ logger .warning (
58+ f"Could not convert { type (value )} '{ value } ' to float, using default { default } "
59+ )
60+ return default
61+
62+
2463class AssessmentService :
2564 """Service for assessing extraction result confidence using LLMs."""
2665
@@ -127,16 +166,20 @@ def _get_attribute_confidence_threshold(
127166 # First check top-level attributes
128167 for attr in attributes :
129168 if attr .get ("name" ) == attr_name :
130- return float (attr .get ("confidence_threshold" , default_threshold ))
169+ return _safe_float_conversion (
170+ attr .get ("confidence_threshold" , default_threshold ),
171+ default_threshold ,
172+ )
131173
132174 # Check nested group attributes
133175 for attr in attributes :
134176 if attr .get ("attributeType" ) == "group" :
135177 group_attributes = attr .get ("groupAttributes" , [])
136178 for group_attr in group_attributes :
137179 if group_attr .get ("name" ) == attr_name :
138- return float (
139- group_attr .get ("confidence_threshold" , default_threshold )
180+ return _safe_float_conversion (
181+ group_attr .get ("confidence_threshold" , default_threshold ),
182+ default_threshold ,
140183 )
141184
142185 # Check nested list item attributes
@@ -146,8 +189,9 @@ def _get_attribute_confidence_threshold(
146189 item_attributes = list_template .get ("itemAttributes" , [])
147190 for item_attr in item_attributes :
148191 if item_attr .get ("name" ) == attr_name :
149- return float (
150- item_attr .get ("confidence_threshold" , default_threshold )
192+ return _safe_float_conversion (
193+ item_attr .get ("confidence_threshold" , default_threshold ),
194+ default_threshold ,
151195 )
152196
153197 # Return default if not found
@@ -247,7 +291,9 @@ def _check_confidence_alerts(
247291 """
248292 for sub_attr_name , sub_assessment in assessment_data .items ():
249293 if isinstance (sub_assessment , dict ) and "confidence" in sub_assessment :
250- confidence = sub_assessment .get ("confidence" , 0.0 )
294+ confidence = _safe_float_conversion (
295+ sub_assessment .get ("confidence" , 0.0 ), 0.0
296+ )
251297 if confidence < threshold :
252298 full_attr_name = (
253299 f"{ attr_name } .{ sub_attr_name } "
@@ -627,11 +673,17 @@ def process_document_section(self, document: Document, section_id: str) -> Docum
627673 # Get assessment configuration
628674 assessment_config = self .config .get ("assessment" , {})
629675 model_id = self .config .get ("model_id" ) or assessment_config .get ("model" )
630- temperature = float (assessment_config .get ("temperature" , 0 ))
631- top_k = float (assessment_config .get ("top_k" , 5 ))
632- top_p = float (assessment_config .get ("top_p" , 0.1 ))
676+ temperature = _safe_float_conversion (
677+ assessment_config .get ("temperature" , 0 ), 0.0
678+ )
679+ top_k = _safe_float_conversion (assessment_config .get ("top_k" , 5 ), 5.0 )
680+ top_p = _safe_float_conversion (assessment_config .get ("top_p" , 0.1 ), 0.1 )
633681 max_tokens = (
634- int (assessment_config .get ("max_tokens" , 4096 ))
682+ int (
683+ _safe_float_conversion (
684+ assessment_config .get ("max_tokens" , 4096 ), 4096
685+ )
686+ )
635687 if assessment_config .get ("max_tokens" )
636688 else None
637689 )
0 commit comments