diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index 066151babc91b..37f8f8a525407 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -46,4 +46,6 @@ jobs: with: tool: cargo-audit - name: Run audit check + # Note: you can ignore specific RUSTSEC issues using the `--ignore` flag ,for example: + # run: cargo audit --ignore RUSTSEC-2026-0001 run: cargo audit diff --git a/Cargo.lock b/Cargo.lock index 8dcfbc65c21b0..e98413f01d996 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -56,17 +56,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" -[[package]] -name = "ahash" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" -dependencies = [ - "getrandom 0.2.16", - "once_cell", - "version_check", -] - [[package]] name = "ahash" version = "0.8.12" @@ -275,7 +264,7 @@ version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow-buffer", "arrow-data", "arrow-schema", @@ -462,7 +451,7 @@ version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow-array", "arrow-buffer", "arrow-data", @@ -1058,18 +1047,6 @@ version = "2.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394" -[[package]] -name = "bitvec" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" -dependencies = [ - "funty", - "radium", - "tap", - "wyz", -] - [[package]] name = "blake2" version = "0.10.6" @@ -1203,29 +1180,6 @@ dependencies = [ "syn 2.0.113", ] -[[package]] -name = "borsh" -version = "1.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad8646f98db542e39fc66e68a20b2144f6a732636df7c2354e74645faaa433ce" -dependencies = [ - "borsh-derive", - "cfg_aliases", -] - -[[package]] -name = "borsh-derive" -version = "1.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd1d3c0c2f5833f22386f252fe8ed005c7f59fdcddeef025c01b4c3b9fd9ac3" -dependencies = [ - "once_cell", - "proc-macro-crate", - "proc-macro2", - "quote", - "syn 2.0.113", -] - [[package]] name = "brotli" version = "8.0.2" @@ -1263,28 +1217,6 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" -[[package]] -name = "bytecheck" -version = "0.6.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" -dependencies = [ - "bytecheck_derive", - "ptr_meta", - "simdutf8", -] - -[[package]] -name = "bytecheck_derive" -version = "0.6.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "byteorder" version = "1.5.0" @@ -1988,7 +1920,7 @@ dependencies = [ name = "datafusion-common" version = "51.0.0" dependencies = [ - "ahash 0.8.12", + "ahash", "apache-avro", "arrow", "arrow-ipc", @@ -2337,7 +2269,7 @@ dependencies = [ name = "datafusion-functions-aggregate" version = "51.0.0" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow", "criterion", "datafusion-common", @@ -2358,7 +2290,7 @@ dependencies = [ name = "datafusion-functions-aggregate-common" version = "51.0.0" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow", "criterion", "datafusion-common", @@ -2468,7 +2400,7 @@ dependencies = [ name = "datafusion-physical-expr" version = "51.0.0" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow", "criterion", "datafusion-common", @@ -2508,7 +2440,7 @@ dependencies = [ name = "datafusion-physical-expr-common" version = "51.0.0" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow", "chrono", "datafusion-common", @@ -2543,7 +2475,7 @@ dependencies = [ name = "datafusion-physical-plan" version = "51.0.0" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow", "arrow-ord", "arrow-schema", @@ -2722,10 +2654,8 @@ dependencies = [ "itertools 0.14.0", "log", "object_store", - "postgres-protocol", "postgres-types", "regex", - "rust_decimal", "sqllogictest", "sqlparser", "tempfile", @@ -3143,12 +3073,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" -[[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" - [[package]] name = "futures" version = "0.3.31" @@ -3361,9 +3285,6 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash 0.7.8", -] [[package]] name = "hashbrown" @@ -4463,7 +4384,7 @@ version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow-array", "arrow-buffer", "arrow-cast", @@ -4884,26 +4805,6 @@ dependencies = [ "cc", ] -[[package]] -name = "ptr_meta" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" -dependencies = [ - "ptr_meta_derive", -] - -[[package]] -name = "ptr_meta_derive" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "quad-rand" version = "0.2.3" @@ -4990,12 +4891,6 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" -[[package]] -name = "radium" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" - [[package]] name = "radix_trie" version = "0.2.1" @@ -5206,15 +5101,6 @@ version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" -[[package]] -name = "rend" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" -dependencies = [ - "bytecheck", -] - [[package]] name = "repr_offset" version = "0.2.2" @@ -5280,35 +5166,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "rkyv" -version = "0.7.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" -dependencies = [ - "bitvec", - "bytecheck", - "bytes", - "hashbrown 0.12.3", - "ptr_meta", - "rend", - "rkyv_derive", - "seahash", - "tinyvec", - "uuid", -] - -[[package]] -name = "rkyv_derive" -version = "0.7.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "rstest" version = "0.26.1" @@ -5349,23 +5206,6 @@ dependencies = [ "syn 2.0.113", ] -[[package]] -name = "rust_decimal" -version = "1.38.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8975fc98059f365204d635119cf9c5a60ae67b841ed49b5422a9a7e56cdfac0" -dependencies = [ - "arrayvec", - "borsh", - "bytes", - "num-traits", - "postgres-types", - "rand 0.8.5", - "rkyv", - "serde", - "serde_json", -] - [[package]] name = "rustc-hash" version = "2.1.1" @@ -5559,12 +5399,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "seahash" -version = "4.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" - [[package]] name = "security-framework" version = "3.5.0" @@ -6105,12 +5939,6 @@ dependencies = [ "windows", ] -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - [[package]] name = "tempfile" version = "3.23.0" @@ -7320,15 +7148,6 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" -[[package]] -name = "wyz" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" -dependencies = [ - "tap", -] - [[package]] name = "xattr" version = "1.6.1" diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml index a26a1d44225fe..dcec858a2475a 100644 --- a/datafusion/sqllogictest/Cargo.toml +++ b/datafusion/sqllogictest/Cargo.toml @@ -55,9 +55,7 @@ indicatif = "0.18" itertools = { workspace = true } log = { workspace = true } object_store = { workspace = true } -postgres-protocol = { version = "0.6.7", optional = true } postgres-types = { version = "0.2.11", features = ["derive", "with-chrono-0_4"], optional = true } -rust_decimal = { version = "1.38.0", features = ["tokio-pg"] } # When updating the following dependency verify that sqlite test file regeneration works correctly # by running the regenerate_sqlite_files.sh script. sqllogictest = "0.28.4" @@ -75,7 +73,6 @@ postgres = [ "bytes", "chrono", "postgres-types", - "postgres-protocol", "testcontainers-modules", "tokio-postgres", ] diff --git a/datafusion/sqllogictest/src/engines/conversion.rs b/datafusion/sqllogictest/src/engines/conversion.rs index 633029a2def29..3e519042f4ee0 100644 --- a/datafusion/sqllogictest/src/engines/conversion.rs +++ b/datafusion/sqllogictest/src/engines/conversion.rs @@ -18,7 +18,7 @@ use arrow::datatypes::{Decimal128Type, Decimal256Type, DecimalType, i256}; use bigdecimal::BigDecimal; use half::f16; -use rust_decimal::prelude::*; +use std::str::FromStr; /// Represents a constant for NULL string in your database. pub const NULL_STR: &str = "NULL"; @@ -115,8 +115,8 @@ pub(crate) fn decimal_256_to_str(value: i256, scale: i8) -> String { } #[cfg(feature = "postgres")] -pub(crate) fn decimal_to_str(value: Decimal) -> String { - big_decimal_to_str(BigDecimal::from_str(&value.to_string()).unwrap(), None) +pub(crate) fn decimal_to_str(value: BigDecimal) -> String { + big_decimal_to_str(value, None) } /// Converts a `BigDecimal` to its plain string representation, optionally rounding to a specified number of decimal places. diff --git a/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs b/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs index b14886fedd617..c3f266dcd1b62 100644 --- a/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs +++ b/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs @@ -16,6 +16,7 @@ // under the License. use async_trait::async_trait; +use bigdecimal::BigDecimal; use bytes::Bytes; use datafusion::common::runtime::SpawnedTask; use futures::{SinkExt, StreamExt}; @@ -32,12 +33,8 @@ use crate::engines::output::{DFColumnType, DFOutput}; use chrono::{NaiveDate, NaiveDateTime, NaiveTime}; use indicatif::ProgressBar; use postgres_types::Type; -use rust_decimal::Decimal; use tokio::time::Instant; -use tokio_postgres::{Column, Row}; -use types::PgRegtype; - -mod types; +use tokio_postgres::{SimpleQueryMessage, SimpleQueryRow}; // default connect string, can be overridden by the `PG_URL` environment variable const PG_URI: &str = "postgresql://postgres@127.0.0.1/test"; @@ -299,8 +296,20 @@ impl sqllogictest::AsyncDB for Postgres { self.pb.inc(1); return Ok(DBOutput::StatementComplete(0)); } + // Use a prepared statement to get the output column types + let statement = self.get_client().prepare(sql).await?; + let types: Vec = statement + .columns() + .iter() + .map(|c| c.type_().clone()) + .collect(); + + // Run the actual query using the "simple query" protocol that returns all + // rows as text. Doing this avoids having to convert values from the binary + // format to strings, which is somewhat tricky for numeric types. + // See https://github.com/apache/datafusion/pull/19666#discussion_r2668090587 let start = Instant::now(); - let rows = self.get_client().query(sql, &[]).await?; + let messages = self.get_client().simple_query(sql).await?; let duration = start.elapsed(); if duration.gt(&Duration::from_millis(500)) { @@ -309,30 +318,16 @@ impl sqllogictest::AsyncDB for Postgres { self.pb.inc(1); - let types: Vec = if rows.is_empty() { - self.get_client() - .prepare(sql) - .await? - .columns() - .iter() - .map(|c| c.type_().clone()) - .collect() - } else { - rows[0] - .columns() - .iter() - .map(|c| c.type_().clone()) - .collect() - }; - self.currently_executing_sql_tracker.remove_sql(tracked_sql); + let rows = convert_rows(&types, &messages); + if rows.is_empty() && types.is_empty() { Ok(DBOutput::StatementComplete(0)) } else { Ok(DBOutput::Rows { types: convert_types(types), - rows: convert_rows(&rows), + rows, }) } } @@ -351,58 +346,68 @@ impl sqllogictest::AsyncDB for Postgres { } } -fn convert_rows(rows: &[Row]) -> Vec> { - rows.iter() +fn convert_rows(types: &[Type], messages: &[SimpleQueryMessage]) -> Vec> { + messages + .iter() + .filter_map(|message| match message { + SimpleQueryMessage::Row(row) => Some(row), + _ => None, + }) .map(|row| { - row.columns() + types .iter() .enumerate() - .map(|(idx, column)| cell_to_string(row, column, idx)) + .map(|(idx, column_type)| cell_to_string(row, column_type, idx)) .collect::>() }) .collect::>() } -macro_rules! make_string { - ($row:ident, $idx:ident, $t:ty) => {{ - let value: Option<$t> = $row.get($idx); - match value { - Some(value) => value.to_string(), - None => NULL_STR.to_string(), +fn cell_to_string(row: &SimpleQueryRow, column_type: &Type, idx: usize) -> String { + // simple_query returns text values, so we parse by Postgres type to keep + // normalization aligned with the DataFusion engine output. + let value = row.get(idx); + match (column_type, value) { + (_, None) => NULL_STR.to_string(), + (&Type::CHAR, Some(value)) => value + .as_bytes() + .first() + .map(|byte| (*byte as i8).to_string()) + .unwrap_or_else(|| NULL_STR.to_string()), + (&Type::INT2, Some(value)) => value.parse::().unwrap().to_string(), + (&Type::INT4, Some(value)) => value.parse::().unwrap().to_string(), + (&Type::INT8, Some(value)) => value.parse::().unwrap().to_string(), + (&Type::NUMERIC, Some(value)) => { + decimal_to_str(BigDecimal::from_str(value).unwrap()) } - }}; - ($row:ident, $idx:ident, $t:ty, $convert:ident) => {{ - let value: Option<$t> = $row.get($idx); - match value { - Some(value) => $convert(value).to_string(), - None => NULL_STR.to_string(), + // Parse date/time strings explicitly to avoid locale-specific formatting. + (&Type::DATE, Some(value)) => NaiveDate::parse_from_str(value, "%Y-%m-%d") + .unwrap() + .to_string(), + (&Type::TIME, Some(value)) => NaiveTime::parse_from_str(value, "%H:%M:%S%.f") + .unwrap() + .to_string(), + (&Type::TIMESTAMP, Some(value)) => { + let parsed = NaiveDateTime::parse_from_str(value, "%Y-%m-%d %H:%M:%S%.f") + .or_else(|_| NaiveDateTime::parse_from_str(value, "%Y-%m-%dT%H:%M:%S%.f")) + .unwrap(); + format!("{parsed:?}") } - }}; -} - -fn cell_to_string(row: &Row, column: &Column, idx: usize) -> String { - match column.type_().clone() { - Type::CHAR => make_string!(row, idx, i8), - Type::INT2 => make_string!(row, idx, i16), - Type::INT4 => make_string!(row, idx, i32), - Type::INT8 => make_string!(row, idx, i64), - Type::NUMERIC => make_string!(row, idx, Decimal, decimal_to_str), - Type::DATE => make_string!(row, idx, NaiveDate), - Type::TIME => make_string!(row, idx, NaiveTime), - Type::TIMESTAMP => { - let value: Option = row.get(idx); - value - .map(|d| format!("{d:?}")) - .unwrap_or_else(|| "NULL".to_string()) + (&Type::BOOL, Some(value)) => { + let parsed = match value { + "t" | "true" | "TRUE" => true, + "f" | "false" | "FALSE" => false, + _ => panic!("Unsupported boolean value: {value}"), + }; + bool_to_str(parsed) } - Type::BOOL => make_string!(row, idx, bool, bool_to_str), - Type::BPCHAR | Type::VARCHAR | Type::TEXT => { - make_string!(row, idx, &str, varchar_to_str) + (&Type::BPCHAR | &Type::VARCHAR | &Type::TEXT, Some(value)) => { + varchar_to_str(value) } - Type::FLOAT4 => make_string!(row, idx, f32, f32_to_str), - Type::FLOAT8 => make_string!(row, idx, f64, f64_to_str), - Type::REGTYPE => make_string!(row, idx, PgRegtype), - _ => unimplemented!("Unsupported type: {}", column.type_().name()), + (&Type::FLOAT4, Some(value)) => f32_to_str(value.parse::().unwrap()), + (&Type::FLOAT8, Some(value)) => f64_to_str(value.parse::().unwrap()), + (&Type::REGTYPE, Some(value)) => value.to_string(), + _ => unimplemented!("Unsupported type: {}", column_type.name()), } } diff --git a/datafusion/sqllogictest/src/engines/postgres_engine/types.rs b/datafusion/sqllogictest/src/engines/postgres_engine/types.rs deleted file mode 100644 index 510462befb086..0000000000000 --- a/datafusion/sqllogictest/src/engines/postgres_engine/types.rs +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use postgres_types::Type; -use std::fmt::Display; -use tokio_postgres::types::FromSql; - -pub struct PgRegtype { - value: String, -} - -impl<'a> FromSql<'a> for PgRegtype { - fn from_sql( - _: &Type, - buf: &'a [u8], - ) -> Result> { - let oid = postgres_protocol::types::oid_from_sql(buf)?; - let value = Type::from_oid(oid).ok_or("bad type")?.to_string(); - Ok(PgRegtype { value }) - } - - fn accepts(ty: &Type) -> bool { - matches!(*ty, Type::REGTYPE) - } -} - -impl Display for PgRegtype { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.value) - } -}