diff --git a/Cargo.lock b/Cargo.lock index 5dc1f5e1f9f67..011da9ffd9c8a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -232,9 +232,8 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "arrow-arith", "arrow-array", @@ -255,9 +254,8 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "arrow-array", "arrow-buffer", @@ -269,9 +267,8 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "ahash", "arrow-buffer", @@ -288,9 +285,8 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "bytes", "half", @@ -300,9 +296,8 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "arrow-array", "arrow-buffer", @@ -322,9 +317,8 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "arrow-array", "arrow-cast", @@ -337,9 +331,8 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "arrow-buffer", "arrow-schema", @@ -350,9 +343,8 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b5f57c3d39d1b1b7c1376a772ea86a131e7da310aed54ebea9363124bb885e3" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "arrow-arith", "arrow-array", @@ -378,9 +370,8 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "arrow-array", "arrow-buffer", @@ -394,9 +385,8 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "arrow-array", "arrow-buffer", @@ -418,9 +408,8 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "arrow-array", "arrow-buffer", @@ -431,9 +420,8 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "arrow-array", "arrow-buffer", @@ -444,9 +432,8 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "bitflags", "serde", @@ -456,9 +443,8 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "ahash", "arrow-array", @@ -470,9 +456,8 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "arrow-array", "arrow-buffer", @@ -3503,7 +3488,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.62.2", + "windows-core", ] [[package]] @@ -4323,9 +4308,8 @@ dependencies = [ [[package]] name = "parquet" -version = "57.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" +version = "57.2.0" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb%2Fprepare_57.2.0#a3222c932a491f63e3f175bee8b51ecf548762a3" dependencies = [ "ahash", "arrow-array", @@ -6786,7 +6770,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" dependencies = [ "windows-collections", - "windows-core 0.61.2", + "windows-core", "windows-future", "windows-link 0.1.3", "windows-numerics", @@ -6798,7 +6782,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" dependencies = [ - "windows-core 0.61.2", + "windows-core", ] [[package]] @@ -6810,21 +6794,8 @@ dependencies = [ "windows-implement", "windows-interface", "windows-link 0.1.3", - "windows-result 0.3.4", - "windows-strings 0.4.2", -] - -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link 0.2.1", - "windows-result 0.4.1", - "windows-strings 0.5.1", + "windows-result", + "windows-strings", ] [[package]] @@ -6833,7 +6804,7 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" dependencies = [ - "windows-core 0.61.2", + "windows-core", "windows-link 0.1.3", "windows-threading", ] @@ -6878,7 +6849,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" dependencies = [ - "windows-core 0.61.2", + "windows-core", "windows-link 0.1.3", ] @@ -6891,15 +6862,6 @@ dependencies = [ "windows-link 0.1.3", ] -[[package]] -name = "windows-result" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" -dependencies = [ - "windows-link 0.2.1", -] - [[package]] name = "windows-strings" version = "0.4.2" @@ -6909,15 +6871,6 @@ dependencies = [ "windows-link 0.1.3", ] -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link 0.2.1", -] - [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index b9d8b1a69ef61..25bf5c037404e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -91,19 +91,19 @@ ahash = { version = "0.8", default-features = false, features = [ "runtime-rng", ] } apache-avro = { version = "0.21", default-features = false } -arrow = { version = "57.1.0", features = [ +arrow = { version = "57.2.0", features = [ "prettyprint", "chrono-tz", ] } -arrow-buffer = { version = "57.1.0", default-features = false } -arrow-flight = { version = "57.1.0", features = [ +arrow-buffer = { version = "57.2.0", default-features = false } +arrow-flight = { version = "57.2.0", features = [ "flight-sql-experimental", ] } -arrow-ipc = { version = "57.1.0", default-features = false, features = [ +arrow-ipc = { version = "57.2.0", default-features = false, features = [ "lz4", ] } -arrow-ord = { version = "57.1.0", default-features = false } -arrow-schema = { version = "57.1.0", default-features = false } +arrow-ord = { version = "57.2.0", default-features = false } +arrow-schema = { version = "57.2.0", default-features = false } async-trait = "0.1.89" bigdecimal = "0.4.8" bytes = "1.11" @@ -166,7 +166,7 @@ log = "^0.4" num-traits = { version = "0.2" } object_store = { version = "0.12.4", default-features = false } parking_lot = "0.12" -parquet = { version = "57.1.0", default-features = false, features = [ +parquet = { version = "57.2.0", default-features = false, features = [ "arrow", "async", "object_store", @@ -271,3 +271,32 @@ incremental = false inherits = "release" debug = true strip = false + +## Temporary arrow-rs patch until 57.2.0 is released + +[patch.crates-io] +arrow = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/prepare_57.2.0" } +arrow-array = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/prepare_57.2.0" } +arrow-buffer = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/prepare_57.2.0" } +arrow-cast = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/prepare_57.2.0" } +arrow-data = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/prepare_57.2.0" } +arrow-ipc = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/prepare_57.2.0" } +arrow-schema = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/prepare_57.2.0" } +arrow-select = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/prepare_57.2.0" } +arrow-string = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/prepare_57.2.0" } +arrow-ord = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/prepare_57.2.0" } +arrow-flight = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/prepare_57.2.0" } +parquet = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/prepare_57.2.0" } + +#arrow = { path= "/Users/andrewlamb/Software/arrow-rs2/arrow" } +#arrow-array = { path= "/Users/andrewlamb/Software/arrow-rs2/arrow-array" } +#arrow-buffer = { path= "/Users/andrewlamb/Software/arrow-rs2/arrow-buffer" } +#arrow-cast = { path= "/Users/andrewlamb/Software/arrow-rs2/arrow-cast" } +#arrow-data = { path= "/Users/andrewlamb/Software/arrow-rs2/arrow-data" } +#arrow-ipc = { path= "/Users/andrewlamb/Software/arrow-rs2/arrow-ipc" } +#arrow-schema = { path= "/Users/andrewlamb/Software/arrow-rs2/arrow-schema" } +#arrow-select = { path= "/Users/andrewlamb/Software/arrow-rs2/arrow-select" } +#arrow-string = { path= "/Users/andrewlamb/Software/arrow-rs2/arrow-string" } +#arrow-ord = { path= "/Users/andrewlamb/Software/arrow-rs2/arrow-ord" } +#arrow-flight = { path= "/Users/andrewlamb/Software/arrow-rs2/arrow-flight" } +#parquet = { path= "/Users/andrewlamb/Software/arrow-rs2/parquet" } diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index e4e048ad3c0d8..eda4952cf590b 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -8868,7 +8868,7 @@ mod tests { .unwrap(), ScalarValue::try_new_null(&DataType::Map(map_field_ref, false)).unwrap(), ScalarValue::try_new_null(&DataType::Union( - UnionFields::new(vec![42], vec![field_ref]), + UnionFields::try_new(vec![42], vec![field_ref]).unwrap(), UnionMode::Dense, )) .unwrap(), @@ -8971,13 +8971,14 @@ mod tests { } // Test union type - let union_fields = UnionFields::new( + let union_fields = UnionFields::try_new( vec![0, 1], vec![ Field::new("i32", DataType::Int32, false), Field::new("f64", DataType::Float64, false), ], - ); + ) + .unwrap(); let union_result = ScalarValue::new_default(&DataType::Union( union_fields.clone(), UnionMode::Sparse, diff --git a/datafusion/datasource-avro/src/avro_to_arrow/schema.rs b/datafusion/datasource-avro/src/avro_to_arrow/schema.rs index 0e8f2a4d56088..cdce091430959 100644 --- a/datafusion/datasource-avro/src/avro_to_arrow/schema.rs +++ b/datafusion/datasource-avro/src/avro_to_arrow/schema.rs @@ -118,7 +118,10 @@ fn schema_to_field_with_props( .map(|s| schema_to_field_with_props(s, None, has_nullable, None)) .collect::>>()?; let type_ids = 0_i8..fields.len() as i8; - DataType::Union(UnionFields::new(type_ids, fields), UnionMode::Dense) + DataType::Union( + UnionFields::try_new(type_ids, fields).unwrap(), + UnionMode::Dense, + ) } } AvroSchema::Record(RecordSchema { fields, .. }) => { diff --git a/datafusion/functions/src/core/union_extract.rs b/datafusion/functions/src/core/union_extract.rs index 56d4f23cc4e2e..8d915fb2e2c07 100644 --- a/datafusion/functions/src/core/union_extract.rs +++ b/datafusion/functions/src/core/union_extract.rs @@ -189,13 +189,14 @@ mod tests { fn test_scalar_value() -> Result<()> { let fun = UnionExtractFun::new(); - let fields = UnionFields::new( + let fields = UnionFields::try_new( vec![1, 3], vec![ Field::new("str", DataType::Utf8, false), Field::new("int", DataType::Int32, false), ], - ); + ) + .unwrap(); let args = vec![ ColumnarValue::Scalar(ScalarValue::Union( diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 674fe6692adf5..42adb84397c47 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -1557,13 +1557,14 @@ mod tests { #[test] fn test_equivalence_properties_union_type() -> Result<()> { let union_type = DataType::Union( - UnionFields::new( + UnionFields::try_new( vec![0, 1], vec![ Field::new("f1", DataType::Int32, true), Field::new("f2", DataType::Utf8, true), ], - ), + ) + .unwrap(), UnionMode::Sparse, ); diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs index e8e71c3884586..f0a8a4afbc6f5 100644 --- a/datafusion/proto-common/src/from_proto/mod.rs +++ b/datafusion/proto-common/src/from_proto/mod.rs @@ -310,7 +310,11 @@ impl TryFrom<&protobuf::arrow_type::ArrowTypeEnum> for DataType { false => union.type_ids.iter().map(|i| *i as i8).collect(), }; - DataType::Union(UnionFields::new(type_ids, union_fields), union_mode) + let union_fields = + UnionFields::try_new(type_ids, union_fields).map_err(|e| { + DataFusionError::from(e).context("Deserializing Union DataType") + })?; + DataType::Union(union_fields, union_mode) } arrow_type::ArrowTypeEnum::Dictionary(dict) => { let key_datatype = dict.as_ref().key.as_deref().required("key")?; @@ -602,7 +606,9 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { .collect::>>(); let fields = fields.ok_or_else(|| Error::required("UnionField"))?; let fields = parse_proto_fields_to_fields(&fields)?; - let fields = UnionFields::new(ids, fields); + let union_fields = UnionFields::try_new(ids, fields).map_err(|e| { + DataFusionError::from(e).context("Deserializing Union ScalarValue") + })?; let v_id = val.value_id as i8; let val = match &val.value { None => None, @@ -614,7 +620,7 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { Some((v_id, Box::new(val))) } }; - Self::Union(val, fields, mode) + Self::Union(val, union_fields, mode) } Value::FixedSizeBinaryValue(v) => { Self::FixedSizeBinary(v.length, Some(v.clone().values)) diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index bcfda648b53e5..b9af9fc9352b2 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -1780,19 +1780,20 @@ fn round_trip_datatype() { ), ])), DataType::Union( - UnionFields::new( + UnionFields::try_new( vec![7, 5, 3], vec![ Field::new("nullable", DataType::Boolean, false), Field::new("name", DataType::Utf8, false), Field::new("datatype", DataType::Binary, false), ], - ), + ) + .unwrap(), UnionMode::Sparse, ), DataType::Union( - UnionFields::new( - vec![5, 8, 1], + UnionFields::try_new( + vec![5, 8, 1, 100], vec![ Field::new("nullable", DataType::Boolean, false), Field::new("name", DataType::Utf8, false), @@ -1807,7 +1808,8 @@ fn round_trip_datatype() { true, ), ], - ), + ) + .unwrap(), UnionMode::Dense, ), DataType::Dictionary( diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs index 9ec085b41eec0..a69bdac663002 100644 --- a/datafusion/sqllogictest/src/test_context.rs +++ b/datafusion/sqllogictest/src/test_context.rs @@ -436,14 +436,15 @@ fn create_example_udf() -> ScalarUDF { fn register_union_table(ctx: &SessionContext) { let union = UnionArray::try_new( - UnionFields::new( + UnionFields::try_new( // typeids: 3 for int, 1 for string vec![3, 1], vec![ Field::new("int", DataType::Int32, false), Field::new("string", DataType::Utf8, false), ], - ), + ) + .unwrap(), ScalarBuffer::from(vec![3, 1, 3]), None, vec![ diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt index 074d216ac7524..481dde5be9f5c 100644 --- a/datafusion/sqllogictest/test_files/case.slt +++ b/datafusion/sqllogictest/test_files/case.slt @@ -384,8 +384,7 @@ SELECT column2, column3, column4 FROM t; {foo: a, xxx: b} {xxx: c, foo: d} {xxx: e} # coerce structs with different field orders, -# (note the *value*s are from column2 but the field name is 'xxx', as the coerced -# type takes the field name from the last argument (column3) +# should keep the same field values query ? SELECT case @@ -394,7 +393,7 @@ SELECT end FROM t; ---- -{xxx: a, foo: b} +{xxx: b, foo: a} # coerce structs with different field orders query ? diff --git a/datafusion/sqllogictest/test_files/spark/hash/crc32.slt b/datafusion/sqllogictest/test_files/spark/hash/crc32.slt index 6fbeb11fb9a36..df5588c75837d 100644 --- a/datafusion/sqllogictest/test_files/spark/hash/crc32.slt +++ b/datafusion/sqllogictest/test_files/spark/hash/crc32.slt @@ -81,7 +81,7 @@ SELECT crc32(arrow_cast('Spark', 'BinaryView')); ---- 1557323817 -# Upstream arrow-rs issue: https://github.com/apache/arrow-rs/issues/8841 -# This should succeed after we receive the fix -query error Arrow error: Compute error: Internal Error: Cannot cast BinaryView to BinaryArray of expected type +query I select crc32(arrow_cast(null, 'Dictionary(Int32, Utf8)')) +---- +NULL diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index d985af1104da3..a91a5e7f870a9 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -492,9 +492,18 @@ Struct("r": Utf8, "c": Float64) statement ok drop table t; -query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Float64 type +statement ok create table t as values({r: 'a', c: 1}), ({c: 2.3, r: 'b'}); +query ? +select * from t; +---- +{c: 1.0, r: a} +{c: 2.3, r: b} + +statement ok +drop table t; + ################################## ## Test Coalesce with Struct ################################## @@ -560,10 +569,18 @@ create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as valu (row('purple', 1), row('green', 2.3)); # out of order struct literal -# TODO: This query should not fail -statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'b' to value of Int32 type +statement ok create table t(a struct(r varchar, c int)) as values ({r: 'a', c: 1}), ({c: 2, r: 'b'}); +query ? +select * from t; +---- +{r: a, c: 1} +{r: b, c: 2} + +statement ok +drop table t; + ################################## ## Test Array of Struct ################################## @@ -573,9 +590,11 @@ select [{r: 'a', c: 1}, {r: 'b', c: 2}]; ---- [{r: a, c: 1}, {r: b, c: 2}] -# Can't create a list of struct with different field types -query error +# Create a list of struct with different field types +query ? select [{r: 'a', c: 1}, {c: 2, r: 'b'}]; +---- +[{c: 1, r: a}, {c: 2, r: b}] statement ok create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as values (row('a', 1), row('b', 2.3)); @@ -592,9 +611,11 @@ drop table t; statement ok create table t(a struct(r varchar, c int), b struct(c float, r varchar)) as values (row('a', 1), row(2.3, 'b')); -# create array with different struct type is not valid -query error +# create array with different struct type should be cast +query T select arrow_typeof([a, b]) from t; +---- +List(Struct("c": Float32, "r": Utf8View)) statement ok drop table t; @@ -602,13 +623,13 @@ drop table t; statement ok create table t(a struct(r varchar, c int, g float), b struct(r varchar, c float, g int)) as values (row('a', 1, 2.3), row('b', 2.3, 2)); -# type of each column should not coerced but perserve as it is +# type of each column should not coerced but preserve as it is query T select arrow_typeof(a) from t; ---- Struct("r": Utf8View, "c": Int32, "g": Float32) -# type of each column should not coerced but perserve as it is +# type of each column should not coerced but preserve as it is query T select arrow_typeof(b) from t; ----