Skip to content

Commit 4c40b85

Browse files
committed
Remove python_object_to_scalar_value code
1 parent 412029c commit 4c40b85

File tree

3 files changed

+134
-241
lines changed

3 files changed

+134
-241
lines changed

src/config.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
use pyo3::prelude::*;
1919
use pyo3::types::*;
2020

21-
use datafusion::common::ScalarValue;
2221
use datafusion::config::ConfigOptions;
2322

2423
use crate::errors::PyDataFusionResult;

src/dataframe.rs

Lines changed: 1 addition & 232 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use arrow::ffi_stream::FFI_ArrowArrayStream;
2626
use datafusion::arrow::datatypes::Schema;
2727
use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow};
2828
use datafusion::arrow::util::pretty;
29-
use datafusion::common::{ScalarValue, UnnestOptions};
29+
use datafusion::common::UnnestOptions;
3030
use datafusion::config::{CsvOptions, TableParquetOptions};
3131
use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
3232
use datafusion::datasource::TableProvider;
@@ -884,234 +884,3 @@ async fn collect_record_batches_to_display(
884884

885885
Ok((record_batches, has_more))
886886
}
887-
888-
/// Convert a Python value to a DataFusion ScalarValue
889-
fn python_value_to_scalar_value(value: &PyObject, py: Python) -> PyDataFusionResult<ScalarValue> {
890-
if value.is_none(py) {
891-
let msg = "Cannot use None as fill value";
892-
return Err(PyDataFusionError::Common(msg.to_string()));
893-
}
894-
895-
// Convert PyObject to PyAny for easier extraction
896-
let py_any: &PyAny = value.as_ref(py);
897-
898-
// Try extracting different types in sequence
899-
if let Some(scalar) = try_extract_numeric(value, py) {
900-
return Ok(scalar);
901-
}
902-
903-
if let Ok(val) = value.extract::<bool>(py) {
904-
return Ok(ScalarValue::Boolean(Some(val)));
905-
}
906-
907-
if let Ok(val) = value.extract::<String>(py) {
908-
return Ok(ScalarValue::Utf8(Some(val)));
909-
}
910-
911-
if let Some(scalar) = try_extract_datetime(value, py) {
912-
return Ok(scalar);
913-
}
914-
915-
if let Some(scalar) = try_extract_date(value, py) {
916-
return Ok(scalar);
917-
}
918-
919-
// Fallback to string representation
920-
try_convert_to_string(value, py)
921-
}
922-
923-
/// Try to extract numeric types from a Python object
924-
fn try_extract_numeric(value: &PyObject, py: Python) -> Option<ScalarValue> {
925-
// Integer types
926-
if let Ok(val) = value.extract::<i64>(py) {
927-
return Some(ScalarValue::Int64(Some(val)));
928-
} else if let Ok(val) = value.extract::<i32>(py) {
929-
return Some(ScalarValue::Int32(Some(val)));
930-
} else if let Ok(val) = value.extract::<i16>(py) {
931-
return Some(ScalarValue::Int16(Some(val)));
932-
} else if let Ok(val) = value.extract::<i8>(py) {
933-
return Some(ScalarValue::Int8(Some(val)));
934-
}
935-
936-
// Unsigned integer types
937-
if let Ok(val) = value.extract::<u64>(py) {
938-
return Some(ScalarValue::UInt64(Some(val)));
939-
} else if let Ok(val) = value.extract::<u32>(py) {
940-
return Some(ScalarValue::UInt32(Some(val)));
941-
} else if let Ok(val) = value.extract::<u16>(py) {
942-
return Some(ScalarValue::UInt16(Some(val)));
943-
} else if let Ok(val) = value.extract::<u8>(py) {
944-
return Some(ScalarValue::UInt8(Some(val)));
945-
}
946-
947-
// Float types
948-
if let Ok(val) = value.extract::<f64>(py) {
949-
return Some(ScalarValue::Float64(Some(val)));
950-
} else if let Ok(val) = value.extract::<f32>(py) {
951-
return Some(ScalarValue::Float32(Some(val)));
952-
}
953-
954-
None
955-
}
956-
957-
/// Try to extract datetime from a Python object
958-
fn try_extract_datetime(value: &PyObject, py: Python) -> Option<ScalarValue> {
959-
let datetime_cls = py
960-
.import("datetime")
961-
.and_then(|m| m.getattr("datetime"))
962-
.ok()?;
963-
964-
let any: PyAny = value.extract(py).ok()?;
965-
966-
if any.is_instance(datetime_cls).ok()? {
967-
let dt = any.cast_as::<pyo3::types::PyDateTime>(py).ok()?;
968-
969-
// Extract datetime components
970-
let year = dt.get_year() as i32;
971-
let month = dt.get_month() as u8;
972-
let day = dt.get_day() as u8;
973-
let hour = dt.get_hour() as u8;
974-
let minute = dt.get_minute() as u8;
975-
let second = dt.get_second() as u8;
976-
let micro = dt.get_microsecond() as u32;
977-
978-
// Convert to timestamp
979-
let ts = date_to_timestamp(year, month, day, hour, minute, second, micro * 1000).ok()?;
980-
return Some(ScalarValue::TimestampNanosecond(Some(ts), None));
981-
}
982-
983-
None
984-
}
985-
986-
/// Try to extract date from a Python object
987-
fn try_extract_date(value: &PyObject, py: Python) -> Option<ScalarValue> {
988-
// Import datetime module once
989-
let datetime_mod = py.import("datetime").ok()?;
990-
let date_cls = datetime_mod.getattr("date").ok()?;
991-
let datetime_cls = datetime_mod.getattr("datetime").ok()?;
992-
993-
// convert your PyObject into a &PyAny
994-
let any: PyAny = value.extract(py).ok()?;
995-
996-
// Is it a date?
997-
if any.is_instance(date_cls).ok()? {
998-
// But not a datetime (we assume you handled datetimes elsewhere)
999-
if any.is_instance(datetime_cls).ok()? {
1000-
return None;
1001-
}
1002-
1003-
// Downcast into the PyDate type
1004-
let dt: &PyDate = any.downcast().ok()?;
1005-
1006-
// Pull out year/month/day
1007-
let year = dt.get_year() as i32;
1008-
let month = dt.get_month() as u8;
1009-
let day = dt.get_day() as u8;
1010-
1011-
// Convert to your internal Date32
1012-
let days = date_to_days_since_epoch(year, month, day).ok()?;
1013-
return Some(ScalarValue::Date32(Some(days)));
1014-
}
1015-
1016-
None
1017-
}
1018-
1019-
/// Try to convert a Python object to string
1020-
fn try_convert_to_string(value: &PyObject, py: Python) -> PyDataFusionResult<ScalarValue> {
1021-
// Try to convert arbitrary Python object to string by using str()
1022-
let str_result = value.call_method0(py, "str")?.extract::<String>(py);
1023-
match str_result {
1024-
Ok(string_value) => Ok(ScalarValue::Utf8(Some(string_value))),
1025-
Err(_) => {
1026-
let msg = "Could not convert Python object to string";
1027-
Err(PyDataFusionError::Common(msg.to_string()))
1028-
}
1029-
}
1030-
}
1031-
1032-
/// Helper function to convert date components to timestamp in nanoseconds
1033-
fn date_to_timestamp(
1034-
year: i32,
1035-
month: u8,
1036-
day: u8,
1037-
hour: u8,
1038-
minute: u8,
1039-
second: u8,
1040-
nano: u32,
1041-
) -> Result<i64, String> {
1042-
// This is a simplified implementation
1043-
// For production code, consider using a more complete date/time library
1044-
1045-
// Number of days in each month (non-leap year)
1046-
const DAYS_IN_MONTH: [u8; 12] = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31];
1047-
1048-
// Validate inputs
1049-
if month < 1 || month > 12 {
1050-
return Err("Invalid month".to_string());
1051-
}
1052-
1053-
let max_days = if month == 2 && is_leap_year(year) {
1054-
29
1055-
} else {
1056-
DAYS_IN_MONTH[(month - 1) as usize]
1057-
};
1058-
1059-
if day < 1 || day > max_days {
1060-
return Err("Invalid day".to_string());
1061-
}
1062-
1063-
if hour > 23 || minute > 59 || second > 59 {
1064-
return Err("Invalid time".to_string());
1065-
}
1066-
1067-
// Calculate days since epoch
1068-
let days = date_to_days_since_epoch(year, month, day)?;
1069-
1070-
// Convert to seconds and add time components
1071-
let seconds =
1072-
days as i64 * 86400 + (hour as i64) * 3600 + (minute as i64) * 60 + (second as i64);
1073-
1074-
// Convert to nanoseconds
1075-
Ok(seconds * 1_000_000_000 + nano as i64)
1076-
}
1077-
1078-
/// Helper function to check if a year is a leap year
1079-
fn is_leap_year(year: i32) -> bool {
1080-
(year % 4 == 0 && year % 100 != 0) || (year % 400 == 0)
1081-
}
1082-
1083-
/// Helper function to convert date to days since Unix epoch (1970-01-01)
1084-
fn date_to_days_since_epoch(year: i32, month: u8, day: u8) -> Result<i32, String> {
1085-
// This is a simplified implementation to calculate days since epoch
1086-
if year < 1970 {
1087-
return Err("Dates before 1970 not supported in this implementation".to_string());
1088-
}
1089-
1090-
let mut days = 0;
1091-
1092-
// Add days for each year since 1970
1093-
for y in 1970..year {
1094-
days += if is_leap_year(y) { 366 } else { 365 };
1095-
}
1096-
1097-
// Add days for each month in the current year
1098-
for m in 1..month {
1099-
days += match m {
1100-
1 | 3 | 5 | 7 | 8 | 10 | 12 => 31,
1101-
4 | 6 | 9 | 11 => 30,
1102-
2 => {
1103-
if is_leap_year(year) {
1104-
29
1105-
} else {
1106-
28
1107-
}
1108-
}
1109-
_ => return Err("Invalid month".to_string()),
1110-
};
1111-
}
1112-
1113-
// Add days in current month
1114-
days += day as i32 - 1; // Subtract 1 because we're counting from the start of the month
1115-
1116-
Ok(days)
1117-
}

0 commit comments

Comments
 (0)