Skip to content

Commit 681b2e5

Browse files
committed
refactor: streamline type extraction in python_value_to_scalar_value function
1 parent 6fbafcd commit 681b2e5

File tree

1 file changed

+81
-61
lines changed

1 file changed

+81
-61
lines changed

src/dataframe.rs

Lines changed: 81 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -890,94 +890,114 @@ fn python_value_to_scalar_value(value: &PyObject, py: Python) -> PyDataFusionRes
890890
return Err(PyDataFusionError::Common(msg.to_string()));
891891
}
892892

893-
// Integer types - try different sizes
893+
// Try extracting different types in sequence
894+
if let Some(scalar) = try_extract_numeric(value, py) {
895+
return Ok(scalar);
896+
}
897+
898+
if let Ok(val) = value.extract::<bool>(py) {
899+
return Ok(ScalarValue::Boolean(Some(val)));
900+
}
901+
902+
if let Ok(val) = value.extract::<String>(py) {
903+
return Ok(ScalarValue::Utf8(Some(val)));
904+
}
905+
906+
if let Some(scalar) = try_extract_datetime(value, py) {
907+
return Ok(scalar);
908+
}
909+
910+
if let Some(scalar) = try_extract_date(value, py) {
911+
return Ok(scalar);
912+
}
913+
914+
// Fallback to string representation
915+
try_convert_to_string(value, py)
916+
}
917+
918+
/// Try to extract numeric types from a Python object
919+
fn try_extract_numeric(value: &PyObject, py: Python) -> Option<ScalarValue> {
920+
// Integer types
894921
if let Ok(val) = value.extract::<i64>(py) {
895-
return Ok(ScalarValue::Int64(Some(val)));
922+
return Some(ScalarValue::Int64(Some(val)));
896923
} else if let Ok(val) = value.extract::<i32>(py) {
897-
return Ok(ScalarValue::Int32(Some(val)));
924+
return Some(ScalarValue::Int32(Some(val)));
898925
} else if let Ok(val) = value.extract::<i16>(py) {
899-
return Ok(ScalarValue::Int16(Some(val)));
926+
return Some(ScalarValue::Int16(Some(val)));
900927
} else if let Ok(val) = value.extract::<i8>(py) {
901-
return Ok(ScalarValue::Int8(Some(val)));
928+
return Some(ScalarValue::Int8(Some(val)));
902929
}
903930

904931
// Unsigned integer types
905932
if let Ok(val) = value.extract::<u64>(py) {
906-
return Ok(ScalarValue::UInt64(Some(val)));
933+
return Some(ScalarValue::UInt64(Some(val)));
907934
} else if let Ok(val) = value.extract::<u32>(py) {
908-
return Ok(ScalarValue::UInt32(Some(val)));
935+
return Some(ScalarValue::UInt32(Some(val)));
909936
} else if let Ok(val) = value.extract::<u16>(py) {
910-
return Ok(ScalarValue::UInt16(Some(val)));
937+
return Some(ScalarValue::UInt16(Some(val)));
911938
} else if let Ok(val) = value.extract::<u8>(py) {
912-
return Ok(ScalarValue::UInt8(Some(val)));
939+
return Some(ScalarValue::UInt8(Some(val)));
913940
}
914941

915942
// Float types
916943
if let Ok(val) = value.extract::<f64>(py) {
917-
return Ok(ScalarValue::Float64(Some(val)));
944+
return Some(ScalarValue::Float64(Some(val)));
918945
} else if let Ok(val) = value.extract::<f32>(py) {
919-
return Ok(ScalarValue::Float32(Some(val)));
946+
return Some(ScalarValue::Float32(Some(val)));
920947
}
921948

922-
// Boolean
923-
if let Ok(val) = value.extract::<bool>(py) {
924-
return Ok(ScalarValue::Boolean(Some(val)));
925-
}
949+
None
950+
}
926951

927-
// String types
928-
if let Ok(val) = value.extract::<String>(py) {
929-
return Ok(ScalarValue::Utf8(Some(val)));
930-
}
952+
/// Try to extract datetime from a Python object
953+
fn try_extract_datetime(value: &PyObject, py: Python) -> Option<ScalarValue> {
954+
let datetime_result = py
955+
.import("datetime")
956+
.and_then(|m| m.getattr("datetime"))
957+
.ok()?;
931958

932-
// Handle datetime types
933-
let datetime_result = py.import("datetime").and_then(|m| m.getattr("datetime"));
934-
935-
if let Ok(datetime_cls) = datetime_result {
936-
if let Ok(true) = value.is_instance(datetime_cls) {
937-
if let Ok(dt) = value.cast_as::<pyo3::types::PyDateTime>(py) {
938-
// Convert Python datetime to timestamp in nanoseconds
939-
let year = dt.get_year() as i32;
940-
let month = dt.get_month() as u8;
941-
let day = dt.get_day() as u8;
942-
let hour = dt.get_hour() as u8;
943-
let minute = dt.get_minute() as u8;
944-
let second = dt.get_second() as u8;
945-
let micro = dt.get_microsecond() as u32;
946-
947-
// Use DataFusion's timestamp conversion logic
948-
if let Ok(ts) =
949-
date_to_timestamp(year, month, day, hour, minute, second, micro * 1000)
950-
{
951-
return Ok(ScalarValue::TimestampNanosecond(Some(ts), None));
952-
}
953-
}
959+
if value.is_instance(datetime_result).ok()? {
960+
let dt = value.cast_as::<pyo3::types::PyDateTime>(py).ok()?;
954961

955-
let msg = "Failed to convert Python datetime";
956-
return Err(PyDataFusionError::Common(msg.to_string()));
957-
}
962+
// Extract datetime components
963+
let year = dt.get_year() as i32;
964+
let month = dt.get_month() as u8;
965+
let day = dt.get_day() as u8;
966+
let hour = dt.get_hour() as u8;
967+
let minute = dt.get_minute() as u8;
968+
let second = dt.get_second() as u8;
969+
let micro = dt.get_microsecond() as u32;
970+
971+
// Convert to timestamp
972+
let ts = date_to_timestamp(year, month, day, hour, minute, second, micro * 1000).ok()?;
973+
return Some(ScalarValue::TimestampNanosecond(Some(ts), None));
958974
}
959975

960-
// Check for date (not datetime)
961-
let date_result = py.import("datetime").and_then(|m| m.getattr("date"));
962-
if let Ok(date_cls) = date_result {
963-
if let Ok(true) = value.is_instance(date_cls) {
964-
if let Ok(date) = value.cast_as::<pyo3::types::PyDate>(py) {
965-
let year = date.get_year() as i32;
966-
let month = date.get_month() as u8;
967-
let day = date.get_day() as u8;
976+
None
977+
}
968978

969-
// Calculate days since Unix epoch (1970-01-01)
970-
if let Ok(days) = date_to_days_since_epoch(year, month, day) {
971-
return Ok(ScalarValue::Date32(Some(days)));
972-
}
973-
}
979+
/// Try to extract date from a Python object
980+
fn try_extract_date(value: &PyObject, py: Python) -> Option<ScalarValue> {
981+
let date_result = py.import("datetime").and_then(|m| m.getattr("date")).ok()?;
974982

975-
let msg = "Failed to convert Python date";
976-
return Err(PyDataFusionError::Common(msg.to_string()));
977-
}
983+
if value.is_instance(date_result).ok()? {
984+
let date = value.cast_as::<pyo3::types::PyDate>(py).ok()?;
985+
986+
// Extract date components
987+
let year = date.get_year() as i32;
988+
let month = date.get_month() as u8;
989+
let day = date.get_day() as u8;
990+
991+
// Convert to days since epoch
992+
let days = date_to_days_since_epoch(year, month, day).ok()?;
993+
return Some(ScalarValue::Date32(Some(days)));
978994
}
979995

980-
// Try to convert to string as fallback
996+
None
997+
}
998+
999+
/// Try to convert a Python object to string
1000+
fn try_convert_to_string(value: &PyObject, py: Python) -> PyDataFusionResult<ScalarValue> {
9811001
match value.str(py) {
9821002
Ok(py_str) => match py_str.to_string() {
9831003
Ok(s) => Ok(ScalarValue::Utf8(Some(s))),

0 commit comments

Comments
 (0)