diff --git a/common/src/main/scala/org/apache/comet/CometConf.scala b/common/src/main/scala/org/apache/comet/CometConf.scala index 480eafdcb7..5ee777f3d7 100644 --- a/common/src/main/scala/org/apache/comet/CometConf.scala +++ b/common/src/main/scala/org/apache/comet/CometConf.scala @@ -148,6 +148,17 @@ object CometConf extends ShimCometConf { .booleanConf .createWithDefault(false) + val COMET_ICEBERG_DATA_FILE_CONCURRENCY_LIMIT: ConfigEntry[Int] = + conf("spark.comet.scan.icebergNative.dataFileConcurrencyLimit") + .category(CATEGORY_SCAN) + .doc( + "The number of Iceberg data files to read concurrently within a single task. " + + "Higher values improve throughput for tables with many small files by overlapping " + + "I/O latency, but increase memory usage. Values between 2 and 8 are suggested.") + .intConf + .checkValue(v => v > 0, "Data file concurrency limit must be positive") + .createWithDefault(1) + val COMET_CSV_V2_NATIVE_ENABLED: ConfigEntry[Boolean] = conf("spark.comet.scan.csv.v2.enabled") .category(CATEGORY_TESTING) diff --git a/native/Cargo.lock b/native/Cargo.lock index 0977bb96dc..32f6b6e915 100644 --- a/native/Cargo.lock +++ b/native/Cargo.lock @@ -17,17 +17,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" -[[package]] -name = "ahash" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" -dependencies = [ - "getrandom 0.2.17", - "once_cell", - "version_check", -] - [[package]] name = "ahash" version = "0.8.12" @@ -104,9 +93,9 @@ checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" [[package]] name = "apache-avro" @@ -135,9 +124,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.0" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" +checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" dependencies = [ "rustversion", ] @@ -201,7 +190,7 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c8955af33b25f3b175ee10af580577280b4bd01f7e823d94c7cdef7cf8c9aef" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow-buffer", "arrow-data", "arrow-schema", @@ -347,7 +336,7 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c872d36b7bf2a6a6a2b40de9156265f0242910791db366a2c17476ba8330d68" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "serde_core", "serde_json", ] @@ -358,7 +347,7 @@ version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68bf3e3efbd1278f770d67e5dc410257300b161b93baedb3aae836144edcaf4b" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow-array", "arrow-buffer", "arrow-data", @@ -391,9 +380,9 @@ checksum = "b0f477b951e452a0b6b4a10b53ccd569042d1d01729b519e02074a9c0958a063" [[package]] name = "assertables" -version = "9.8.4" +version = "9.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd1f7f2f608b9a888a851f234086946c2ca1dfeadf1431c5082fee0942eeb6" +checksum = "e58d4dd3e6f507b037ac5878d4502a7b0c04ab5247521a7679944e4dd2f9d15d" [[package]] name = "async-channel" @@ -437,9 +426,9 @@ dependencies = [ [[package]] name = "async-executor" -version = "1.13.3" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497c00e0fd83a72a79a39fcbd8e3e2f055d6f6c7e025f3b3d91f4f8e76527fb8" +checksum = "c96bf972d85afc50bf5ab8fe2d54d1586b4e0b46c97c50a0c9e71e2f7bcd812a" dependencies = [ "async-task", "concurrent-queue", @@ -533,7 +522,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -589,9 +578,9 @@ dependencies = [ [[package]] name = "aws-credential-types" -version = "1.2.12" +version = "1.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e26bbf46abc608f2dc61fd6cb3b7b0665497cc259a21520151ed98f8b37d2c79" +checksum = "6d203b0bf2626dcba8665f5cd0871d7c2c0930223d6b6be9097592fea21242d0" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -601,9 +590,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.15.3" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e84ce723ab67259cfeb9877c6a639ee9eb7a27b28123abd71db7f0d5d0cc9d86" +checksum = "d9a7b350e3bb1767102698302bc37256cbd48422809984b98d292c40e2579aa9" dependencies = [ "aws-lc-sys", "zeroize", @@ -611,9 +600,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.36.0" +version = "0.37.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a442ece363113bd4bd4c8b18977a7798dd4d3c3383f34fb61936960e8f4ad8" +checksum = "b092fe214090261288111db7a2b2c2118e5a7f30dc2569f1732c4069a6840549" dependencies = [ "cc", "cmake", @@ -623,9 +612,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0f92058d22a46adf53ec57a6a96f34447daf02bff52e8fb956c66bcd5c6ac12" +checksum = "ede2ddc593e6c8acc6ce3358c28d6677a6dc49b65ba4b37a2befe14a11297e75" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -648,9 +637,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.94.0" +version = "1.95.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "699da1961a289b23842d88fe2984c6ff68735fdf9bdcbc69ceaeb2491c9bf434" +checksum = "00c5ff27c6ba2cbd95e6e26e2e736676fdf6bcf96495b187733f521cfe4ce448" dependencies = [ "aws-credential-types", "aws-runtime", @@ -672,9 +661,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.96.0" +version = "1.97.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3e3a4cb3b124833eafea9afd1a6cc5f8ddf3efefffc6651ef76a03cbc6b4981" +checksum = "4d186f1e5a3694a188e5a0640b3115ccc6e084d104e16fd6ba968dca072ffef8" dependencies = [ "aws-credential-types", "aws-runtime", @@ -696,9 +685,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.98.0" +version = "1.99.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89c4f19655ab0856375e169865c91264de965bd74c407c7f1e403184b1049409" +checksum = "9acba7c62f3d4e2408fa998a3a8caacd8b9a5b5549cf36e2372fbdae329d5449" dependencies = [ "aws-credential-types", "aws-runtime", @@ -721,9 +710,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f6ae9b71597dc5fd115d52849d7a5556ad9265885ad3492ea8d73b93bbc46e" +checksum = "37411f8e0f4bea0c3ca0958ce7f18f6439db24d555dbd809787262cd00926aa9" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -743,9 +732,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.12" +version = "1.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cba48474f1d6807384d06fec085b909f5807e16653c5af5c45dfe89539f0b70" +checksum = "5cc50d0f63e714784b84223abd7abbc8577de8c35d699e0edd19f0a88a08ae13" dependencies = [ "futures-util", "pin-project-lite", @@ -754,9 +743,9 @@ dependencies = [ [[package]] name = "aws-smithy-http" -version = "0.63.4" +version = "0.63.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af4a8a5fe3e4ac7ee871237c340bbce13e982d37543b65700f4419e039f5d78e" +checksum = "d619373d490ad70966994801bc126846afaa0d1ee920697a031f0cf63f2568e7" dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", @@ -775,9 +764,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.1.10" +version = "1.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0709f0083aa19b704132684bc26d3c868e06bd428ccc4373b0b55c3e8748a58b" +checksum = "00ccbb08c10f6bcf912f398188e42ee2eab5f1767ce215a02a73bc5df1bbdd95" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -827,9 +816,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.10.1" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd3dfc18c1ce097cf81fced7192731e63809829c6cbf933c1ec47452d08e1aa" +checksum = "22ccf7f6eba8b2dcf8ce9b74806c6c185659c311665c4bf8d6e71ebd454db6bf" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -852,9 +841,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.11.4" +version = "1.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c55e0837e9b8526f49e0b9bfa9ee18ddee70e853f5bc09c5d11ebceddcb0fec" +checksum = "b4af6e5def28be846479bbeac55aa4603d6f7986fc5da4601ba324dd5d377516" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -869,9 +858,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.4.4" +version = "1.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "576b0d6991c9c32bc14fc340582ef148311f924d41815f641a308b5d11e8e7cd" +checksum = "8ca2734c16913a45343b37313605d84e7d8b34a4611598ce1d25b35860a2bed3" dependencies = [ "base64-simd", "bytes", @@ -901,9 +890,9 @@ dependencies = [ [[package]] name = "aws-types" -version = "1.3.12" +version = "1.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c50f3cdf47caa8d01f2be4a6663ea02418e892f9bbfd82c7b9a3a37eaccdd3a" +checksum = "0470cc047657c6e286346bdf10a8719d26efd6a91626992e0e64481e44323e96" dependencies = [ "aws-credential-types", "aws-smithy-async", @@ -1003,7 +992,7 @@ version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "cexpr", "clang-sys", "itertools 0.13.0", @@ -1014,7 +1003,7 @@ dependencies = [ "regex", "rustc-hash 2.1.1", "shlex", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -1025,21 +1014,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" - -[[package]] -name = "bitvec" -version = "1.0.1" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" -dependencies = [ - "funty", - "radium", - "tap", - "wyz", -] +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" [[package]] name = "blake2" @@ -1086,11 +1063,21 @@ dependencies = [ "piper", ] +[[package]] +name = "bnum" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f781dba93de3a5ef6dc5b17c9958b208f6f3f021623b360fb605ea51ce443f10" +dependencies = [ + "serde", + "serde-big-array", +] + [[package]] name = "bon" -version = "3.8.2" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234655ec178edd82b891e262ea7cf71f6584bcd09eff94db786be23f1821825c" +checksum = "2d13a61f2963b88eef9c1be03df65d42f6996dfeac1054870d950fcf66686f83" dependencies = [ "bon-macros", "rustversion", @@ -1098,9 +1085,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.8.2" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ec27229c38ed0eb3c0feee3d2c1d6a4379ae44f418a29a658890e062d8f365" +checksum = "d314cc62af2b6b0c65780555abb4d02a03dd3b799cd42419044f0c38d99738c0" dependencies = [ "darling 0.23.0", "ident_case", @@ -1108,30 +1095,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.114", -] - -[[package]] -name = "borsh" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1da5ab77c1437701eeff7c88d968729e7766172279eab0676857b3d63af7a6f" -dependencies = [ - "borsh-derive", - "cfg_aliases", -] - -[[package]] -name = "borsh-derive" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0686c856aa6aac0c4498f936d7d6a02df690f614c03e4d906d1018062b5c5e2c" -dependencies = [ - "once_cell", - "proc-macro-crate", - "proc-macro2", - "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -1157,37 +1121,15 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.1" +version = "3.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" - -[[package]] -name = "bytecheck" -version = "0.6.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" -dependencies = [ - "bytecheck_derive", - "ptr_meta", - "simdutf8", -] - -[[package]] -name = "bytecheck_derive" -version = "0.6.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] +checksum = "5c6f81257d10a0f602a294ae4182251151ff97dbb504ef9afcdda4a64b24d9b4" [[package]] name = "bytemuck" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" [[package]] name = "byteorder" @@ -1359,18 +1301,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.54" +version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394" +checksum = "2797f34da339ce31042b27d23607e051786132987f595b02ba4f6a6dffb7030a" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.54" +version = "4.5.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00" +checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876" dependencies = [ "anstyle", "clap_lex", @@ -1378,9 +1320,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e64b0cc0439b12df2fa678eae89a1c56a529fd067a9115f7827f1fffd22b32" +checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" [[package]] name = "cmake" @@ -1470,9 +1412,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpp_demangle" -version = "0.4.5" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2bb79cb74d735044c972aae58ed0aaa9a837e85b01106a54c39e42e97f62253" +checksum = "0667304c32ea56cb4cd6d2d7c0cfe9a2f8041229db8c033af7f8d69492429def" dependencies = [ "cfg-if", ] @@ -1660,7 +1602,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -1674,7 +1616,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -1687,7 +1629,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -1698,7 +1640,7 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ "darling_core 0.20.11", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -1709,7 +1651,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core 0.21.3", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -1720,7 +1662,7 @@ checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core 0.23.0", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -1960,7 +1902,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow", "arrow-ipc", "chrono", @@ -2221,7 +2163,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow", "datafusion-common", "datafusion-doc", @@ -2242,7 +2184,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow", "datafusion-common", "datafusion-expr-common", @@ -2324,7 +2266,7 @@ checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -2352,7 +2294,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow", "datafusion-common", "datafusion-expr", @@ -2389,7 +2331,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow", "datafusion-common", "datafusion-expr-common", @@ -2421,7 +2363,7 @@ version = "51.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow", "arrow-ord", "arrow-schema", @@ -2526,9 +2468,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" +checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" dependencies = [ "powerfmt", "serde_core", @@ -2552,7 +2494,7 @@ dependencies = [ "darling 0.20.11", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -2562,7 +2504,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -2583,7 +2525,7 @@ dependencies = [ "proc-macro2", "quote", "rustc_version", - "syn 2.0.114", + "syn 2.0.116", "unicode-xid", ] @@ -2613,7 +2555,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -2666,7 +2608,7 @@ checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -2675,6 +2617,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "erased-serde" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89e8918065695684b2b0702da20382d5ae6065cf3327bc2d6436bd49a71ce9f3" +dependencies = [ + "serde", + "serde_core", + "typeid", +] + [[package]] name = "errno" version = "0.3.14" @@ -2722,6 +2675,18 @@ dependencies = [ "once_cell", ] +[[package]] +name = "fastnum" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4089ab2dfd45d8ddc92febb5ca80644389d5ebb954f40231274a3f18341762e2" +dependencies = [ + "bnum", + "num-integer", + "num-traits", + "serde", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -2758,15 +2723,15 @@ version = "25.12.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "rustc_version", ] [[package]] name = "flate2" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b375d6465b98090a5f25b1c7703f3859783755aa9a80433b36e0379a3ec2f369" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", @@ -2814,17 +2779,11 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" -[[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" - [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -2837,9 +2796,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -2847,15 +2806,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-executor" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "baf29c38818342a3b26b5b923639e7b1f4a61fc5e76102d4b1981c6dc7a7579d" dependencies = [ "futures-core", "futures-task", @@ -2864,9 +2823,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-lite" @@ -2883,26 +2842,26 @@ dependencies = [ [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-timer" @@ -2912,9 +2871,9 @@ checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -2924,7 +2883,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -3039,9 +2997,6 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash 0.7.8", -] [[package]] name = "hashbrown" @@ -3049,7 +3004,7 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" dependencies = [ - "ahash 0.8.12", + "ahash", "allocator-api2", ] @@ -3237,14 +3192,13 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.19" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64", "bytes", "futures-channel", - "futures-core", "futures-util", "http 1.4.0", "http-body 1.0.1", @@ -3261,9 +3215,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -3286,7 +3240,7 @@ dependencies = [ [[package]] name = "iceberg" version = "0.8.0" -source = "git+https://github.com/apache/iceberg-rust?rev=ee21563#ee21563c2032948f636eae84870f317a0b299a05" +source = "git+https://github.com/mbutrovich/iceberg-rust.git?branch=reader_perf#ccf13657f324224dbaf647f7fbabc1315877b9a0" dependencies = [ "anyhow", "apache-avro", @@ -3308,13 +3262,13 @@ dependencies = [ "chrono", "derive_builder", "expect-test", + "fastnum", "flate2", "fnv", "futures", "itertools 0.13.0", "moka", "murmur3", - "num-bigint", "once_cell", "opendal", "ordered-float 4.6.0", @@ -3323,7 +3277,6 @@ dependencies = [ "reqsign", "reqwest", "roaring", - "rust_decimal", "serde", "serde_bytes", "serde_derive", @@ -3333,6 +3286,7 @@ dependencies = [ "strum", "tokio", "typed-builder", + "typetag", "url", "uuid", "zstd", @@ -3481,7 +3435,7 @@ version = "0.11.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88" dependencies = [ - "ahash 0.8.12", + "ahash", "indexmap 2.13.0", "is-terminal", "itoa", @@ -3499,6 +3453,15 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" +[[package]] +name = "inventory" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009ae045c87e7082cb72dab0ccd01ae075dd00141ddc108f43a0ea150a9e7227" +dependencies = [ + "rustversion", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -3561,9 +3524,9 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.18" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50" +checksum = "c867c356cc096b33f4981825ab281ecba3db0acefe60329f044c1789d94c6543" dependencies = [ "jiff-static", "jiff-tzdb-platform", @@ -3576,13 +3539,13 @@ dependencies = [ [[package]] name = "jiff-static" -version = "0.2.18" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" +checksum = "f7946b4325269738f270bb55b3c19ab5c5040525f83fd625259422a9d25d9be5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -3762,9 +3725,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libmimalloc-sys" @@ -3886,15 +3849,15 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" -version = "0.9.9" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" dependencies = [ "libc", ] @@ -3943,9 +3906,9 @@ checksum = "dce6dd36094cac388f119d2e9dc82dc730ef91c32a6222170d630e5414b956e6" [[package]] name = "moka" -version = "0.12.12" +version = "0.12.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3dec6bd31b08944e08b58fd99373893a6c17054d6f3ea5006cc894f4f4eee2a" +checksum = "b4ac832c50ced444ef6be0767a008b02c106a909ba79d1d830501e94b96f6b7e" dependencies = [ "async-lock", "crossbeam-channel", @@ -4192,9 +4155,9 @@ dependencies = [ [[package]] name = "openssl-probe" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "ordered-float" @@ -4261,11 +4224,11 @@ dependencies = [ [[package]] name = "parquet" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6a2926a30477c0b95fea6c28c3072712b139337a242c2cc64817bdc20a8854" +checksum = "6ee96b29972a257b855ff2341b37e61af5f12d6af1158b6dcdb5b31ea07bb3cb" dependencies = [ - "ahash 0.8.12", + "ahash", "arrow-array", "arrow-buffer", "arrow-cast", @@ -4302,9 +4265,9 @@ dependencies = [ [[package]] name = "parquet-variant" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c254fac16af78ad96aa442290cb6504951c4d484fdfcfe58f4588033d30e4c8f" +checksum = "a6c31f8f9bfefb9dbf67b0807e00fd918676954a7477c889be971ac904103184" dependencies = [ "arrow-schema", "chrono", @@ -4316,9 +4279,9 @@ dependencies = [ [[package]] name = "parquet-variant-compute" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2178772f1c5ad7e5da8b569d986d3f5cbb4a4cee915925f28fdc700dbb2e80cf" +checksum = "196cd9f7178fed3ac8d5e6d2b51193818e896bbc3640aea3fde3440114a8f39c" dependencies = [ "arrow", "arrow-schema", @@ -4332,9 +4295,9 @@ dependencies = [ [[package]] name = "parquet-variant-json" -version = "57.2.0" +version = "57.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a1510daa121c04848368f9c38d0be425b9418c70be610ecc0aa8071738c0ef3" +checksum = "ed23d7acc90ef60f7fdbcc473fa2fdaefa33542ed15b84388959346d52c839be" dependencies = [ "arrow-schema", "base64", @@ -4409,7 +4372,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -4485,15 +4448,15 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" dependencies = [ "portable-atomic", ] @@ -4551,7 +4514,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -4565,9 +4528,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.105" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] @@ -4578,7 +4541,7 @@ version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25485360a54d6861439d60facef26de713b1e126bf015ec8f98239467a2b82f7" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "chrono", "flate2", "procfs-core", @@ -4591,7 +4554,7 @@ version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6401bf7b6af22f78b563665d15a22e9aef27775b79b149a66ca022468a4e405" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "chrono", "hex", ] @@ -4621,7 +4584,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.114", + "syn 2.0.116", "tempfile", ] @@ -4635,7 +4598,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -4647,26 +4610,6 @@ dependencies = [ "prost", ] -[[package]] -name = "ptr_meta" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" -dependencies = [ - "ptr_meta_derive", -] - -[[package]] -name = "ptr_meta_derive" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "quad-rand" version = "0.2.3" @@ -4759,9 +4702,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" dependencies = [ "proc-macro2", ] @@ -4772,12 +4715,6 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" -[[package]] -name = "radium" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" - [[package]] name = "rand" version = "0.8.5" @@ -4880,7 +4817,7 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", ] [[package]] @@ -4900,7 +4837,7 @@ checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -4917,9 +4854,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -4928,15 +4865,15 @@ dependencies = [ [[package]] name = "regex-lite" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" [[package]] name = "relative-path" @@ -4944,15 +4881,6 @@ version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" -[[package]] -name = "rend" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" -dependencies = [ - "bytecheck", -] - [[package]] name = "reqsign" version = "0.16.5" @@ -5048,35 +4976,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "rkyv" -version = "0.7.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2297bf9c81a3f0dc96bc9521370b88f054168c29826a75e89c55ff196e7ed6a1" -dependencies = [ - "bitvec", - "bytecheck", - "bytes", - "hashbrown 0.12.3", - "ptr_meta", - "rend", - "rkyv_derive", - "seahash", - "tinyvec", - "uuid", -] - -[[package]] -name = "rkyv_derive" -version = "0.7.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84d7b42d4b8d06048d3ac8db0eb31bcb942cbeb709f0b5f2b2ebde398d3038f5" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "roaring" version = "0.11.3" @@ -5112,7 +5011,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.114", + "syn 2.0.116", "unicode-ident", ] @@ -5126,22 +5025,6 @@ dependencies = [ "ordered-multimap", ] -[[package]] -name = "rust_decimal" -version = "1.40.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61f703d19852dbf87cbc513643fa81428361eb6940f1ac14fd58155d295a3eb0" -dependencies = [ - "arrayvec", - "borsh", - "bytes", - "num-traits", - "rand 0.8.5", - "rkyv", - "serde", - "serde_json", -] - [[package]] name = "rustc-demangle" version = "0.1.27" @@ -5175,7 +5058,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "errno", "libc", "linux-raw-sys 0.4.15", @@ -5188,7 +5071,7 @@ version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "errno", "libc", "linux-raw-sys 0.11.0", @@ -5261,9 +5144,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "same-file" @@ -5297,9 +5180,9 @@ dependencies = [ [[package]] name = "schemars" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54e910108742c57a770f492731f99be216a52fadd361b06c8fb59d74ccc267d2" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" dependencies = [ "dyn-clone", "ref-cast", @@ -5313,19 +5196,13 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "seahash" -version = "4.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" - [[package]] name = "security-framework" -version = "3.5.1" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" +checksum = "d17b898a6d6948c3a8ee4372c17cb384f90d2e6e912ef00895b14fd7ab54ec38" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "core-foundation", "core-foundation-sys", "libc", @@ -5334,9 +5211,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.15.0" +version = "2.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" +checksum = "321c8673b092a9a42605034a9879d73cb79101ed5fd117bc9a597b89b4e9e61a" dependencies = [ "core-foundation-sys", "libc", @@ -5364,6 +5241,15 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-big-array" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11fc7cc2c76d73e0f27ee52abbd64eec84d46f370c88371120433196934e4b7f" +dependencies = [ + "serde", +] + [[package]] name = "serde-value" version = "0.7.0" @@ -5401,7 +5287,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5425,7 +5311,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5452,7 +5338,7 @@ dependencies = [ "indexmap 1.9.3", "indexmap 2.13.0", "schemars 0.9.0", - "schemars 1.2.0", + "schemars 1.2.1", "serde_core", "serde_json", "serde_with_macros", @@ -5468,7 +5354,7 @@ dependencies = [ "darling 0.21.3", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5536,15 +5422,15 @@ checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "siphasher" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" @@ -5560,9 +5446,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" dependencies = [ "libc", "windows-sys 0.60.2", @@ -5595,7 +5481,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5634,7 +5520,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5645,9 +5531,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "symbolic-common" -version = "12.17.1" +version = "12.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520cf51c674f8b93d533f80832babe413214bb766b6d7cb74ee99ad2971f8467" +checksum = "751a2823d606b5d0a7616499e4130a516ebd01a44f39811be2b9600936509c23" dependencies = [ "debugid", "memmap2", @@ -5657,9 +5543,9 @@ dependencies = [ [[package]] name = "symbolic-demangle" -version = "12.17.1" +version = "12.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f0de2ee0ffa2641e17ba715ad51d48b9259778176517979cb38b6aa86fa7425" +checksum = "79b237cfbe320601dd24b4ac817a5b68bb28f5508e33f08d42be0682cadc8ac9" dependencies = [ "cpp_demangle", "rustc-demangle", @@ -5679,9 +5565,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.114" +version = "2.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" +checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" dependencies = [ "proc-macro2", "quote", @@ -5705,7 +5591,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5714,12 +5600,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - [[package]] name = "tempfile" version = "3.25.0" @@ -5759,7 +5639,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5770,7 +5650,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5925,7 +5805,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -5974,9 +5854,9 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.6+spec-1.1.0" +version = "1.0.9+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" dependencies = [ "winnow", ] @@ -6002,7 +5882,7 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "bytes", "futures-util", "http 1.4.0", @@ -6045,7 +5925,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -6089,9 +5969,15 @@ checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] +[[package]] +name = "typeid" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" + [[package]] name = "typemap-ors" version = "1.0.0" @@ -6107,11 +5993,35 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +[[package]] +name = "typetag" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be2212c8a9b9bcfca32024de14998494cf9a5dfa59ea1b829de98bac374b86bf" +dependencies = [ + "erased-serde", + "inventory", + "once_cell", + "serde", + "typetag-impl", +] + +[[package]] +name = "typetag-impl" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27a7a9b72ba121f6f1f6c3632b85604cac41aedb5ddc70accbebb6cac83de846" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.116", +] + [[package]] name = "unicode-ident" -version = "1.0.22" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" @@ -6178,11 +6088,11 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.20.0" +version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f" +checksum = "b672338555252d43fd2240c714dc444b8c6fb0a5c5335e65a07bba7742735ddb" dependencies = [ - "getrandom 0.3.4", + "getrandom 0.4.1", "js-sys", "serde_core", "wasm-bindgen", @@ -6295,7 +6205,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", "wasm-bindgen-shared", ] @@ -6349,7 +6259,7 @@ version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.0", "hashbrown 0.15.5", "indexmap 2.13.0", "semver", @@ -6377,9 +6287,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12bed680863276c63889429bfd6cab3b99943659923822de1c8a39c49e4d722c" +checksum = "22cfaf3c063993ff62e73cb4311efde4db1efb31ab78a3e5c457939ad5cc0bed" dependencies = [ "rustls-pki-types", ] @@ -6448,7 +6358,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -6459,7 +6369,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -6756,7 +6666,7 @@ dependencies = [ "heck", "indexmap 2.13.0", "prettyplease", - "syn 2.0.114", + "syn 2.0.116", "wasm-metadata", "wit-bindgen-core", "wit-component", @@ -6772,7 +6682,7 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", "wit-bindgen-core", "wit-bindgen-rust", ] @@ -6784,7 +6694,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", - "bitflags 2.10.0", + "bitflags 2.11.0", "indexmap 2.13.0", "log", "serde", @@ -6820,15 +6730,6 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" -[[package]] -name = "wyz" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" -dependencies = [ - "tap", -] - [[package]] name = "xmlparser" version = "0.13.6" @@ -6863,28 +6764,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.33" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" +checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.33" +version = "0.8.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" +checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] @@ -6904,7 +6805,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", "synstructure", ] @@ -6944,20 +6845,20 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.114", + "syn 2.0.116", ] [[package]] name = "zlib-rs" -version = "0.5.5" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" +checksum = "c745c48e1007337ed136dc99df34128b9faa6ed542d80a1c673cf55a6d7236c8" [[package]] name = "zmij" -version = "1.0.16" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfcd145825aace48cff44a8844de64bf75feec3080e0aa5cdbde72961ae51a65" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" [[package]] name = "zstd" diff --git a/native/Cargo.toml b/native/Cargo.toml index 3aa3cd0abf..9e7ec1a2d8 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -54,7 +54,7 @@ object_store = { version = "0.12.3", features = ["gcp", "azure", "aws", "http"] url = "2.2" aws-config = "1.8.14" aws-credential-types = "1.2.12" -iceberg = { git = "https://github.com/apache/iceberg-rust", rev = "ee21563" } +iceberg = { git = "https://github.com/mbutrovich/iceberg-rust.git", branch = "reader_perf" } [profile.release] debug = true diff --git a/native/core/src/execution/operators/iceberg_scan.rs b/native/core/src/execution/operators/iceberg_scan.rs index bc20592e90..7621bd8c55 100644 --- a/native/core/src/execution/operators/iceberg_scan.rs +++ b/native/core/src/execution/operators/iceberg_scan.rs @@ -51,16 +51,16 @@ use iceberg::scan::FileScanTask; /// Executes pre-planned FileScanTasks for efficient parallel scanning. #[derive(Debug)] pub struct IcebergScanExec { - /// Iceberg table metadata location for FileIO initialization - metadata_location: String, /// Output schema after projection output_schema: SchemaRef, /// Cached execution plan properties plan_properties: PlanProperties, - /// Catalog-specific configuration for FileIO - catalog_properties: HashMap, + /// Reused across execute() calls to preserve cached OpenDAL operators + file_io: FileIO, /// Pre-planned file scan tasks tasks: Vec, + /// Number of data files to read concurrently + data_file_concurrency_limit: usize, /// Metrics metrics: ExecutionPlanMetricsSet, } @@ -71,18 +71,20 @@ impl IcebergScanExec { schema: SchemaRef, catalog_properties: HashMap, tasks: Vec, + data_file_concurrency_limit: usize, ) -> Result { let output_schema = schema; let plan_properties = Self::compute_properties(Arc::clone(&output_schema), 1); + let file_io = Self::load_file_io(&catalog_properties, &metadata_location)?; let metrics = ExecutionPlanMetricsSet::new(); Ok(Self { - metadata_location, output_schema, plan_properties, - catalog_properties, + file_io, tasks, + data_file_concurrency_limit, metrics, }) } @@ -147,7 +149,7 @@ impl IcebergScanExec { context: Arc, ) -> DFResult { let output_schema = Arc::clone(&self.output_schema); - let file_io = Self::load_file_io(&self.catalog_properties, &self.metadata_location)?; + let file_io = self.file_io.clone(); let batch_size = context.session_config().batch_size(); let metrics = IcebergScanMetrics::new(&self.metrics); @@ -158,7 +160,7 @@ impl IcebergScanExec { let reader = iceberg::arrow::ArrowReaderBuilder::new(file_io) .with_batch_size(batch_size) - .with_data_file_concurrency_limit(context.session_config().target_partitions()) + .with_data_file_concurrency_limit(self.data_file_concurrency_limit) .with_row_selection_enabled(true) .build(); @@ -301,11 +303,6 @@ where impl DisplayAs for IcebergScanExec { fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "IcebergScanExec: metadata_location={}, num_tasks={}", - self.metadata_location, - self.tasks.len() - ) + write!(f, "IcebergScanExec: num_tasks={}", self.tasks.len()) } } diff --git a/native/core/src/execution/planner.rs b/native/core/src/execution/planner.rs index f84d6cc590..3b3d6ea7de 100644 --- a/native/core/src/execution/planner.rs +++ b/native/core/src/execution/planner.rs @@ -1176,12 +1176,14 @@ impl PhysicalPlanner { .collect(); let metadata_location = common.metadata_location.clone(); let tasks = parse_file_scan_tasks_from_common(common, &scan.file_scan_tasks)?; + let data_file_concurrency_limit = common.data_file_concurrency_limit as usize; let iceberg_scan = IcebergScanExec::new( metadata_location, required_schema, catalog_properties, tasks, + data_file_concurrency_limit, )?; Ok(( @@ -2941,6 +2943,7 @@ fn parse_file_scan_tasks_from_common( Ok(iceberg::scan::FileScanTask { data_file_path: proto_task.data_file_path.clone(), + file_size_in_bytes: proto_task.file_size_in_bytes, start: proto_task.start, length: proto_task.length, record_count: proto_task.record_count, diff --git a/native/proto/src/proto/operator.proto b/native/proto/src/proto/operator.proto index 93872b462c..2bfefa17d7 100644 --- a/native/proto/src/proto/operator.proto +++ b/native/proto/src/proto/operator.proto @@ -175,6 +175,9 @@ message IcebergScanCommon { repeated PartitionData partition_data_pool = 9; repeated DeleteFileList delete_files_pool = 10; repeated spark.spark_expression.Expr residual_pool = 11; + + // Number of data files to read concurrently within a single task + uint32 data_file_concurrency_limit = 12; } message IcebergScan { @@ -207,6 +210,9 @@ message IcebergFileScanTask { // Record count if reading entire file optional uint64 record_count = 4; + // Total file size from the manifest entry, used to skip stat/HEAD calls + uint64 file_size_in_bytes = 5; + // Indices into IcebergScan deduplication pools uint32 schema_idx = 15; optional uint32 partition_type_idx = 16; diff --git a/spark/src/main/scala/org/apache/comet/serde/operator/CometIcebergNativeScan.scala b/spark/src/main/scala/org/apache/comet/serde/operator/CometIcebergNativeScan.scala index c86b2a51bb..1798f5e366 100644 --- a/spark/src/main/scala/org/apache/comet/serde/operator/CometIcebergNativeScan.scala +++ b/spark/src/main/scala/org/apache/comet/serde/operator/CometIcebergNativeScan.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.comet.{CometBatchScanExec, CometNativeExec} import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceRDD, DataSourceRDDPartition} import org.apache.spark.sql.types._ -import org.apache.comet.ConfigEntry +import org.apache.comet.{CometConf, ConfigEntry} import org.apache.comet.iceberg.{CometIcebergNativeScanMetadata, IcebergReflection} import org.apache.comet.serde.{CometOperatorSerde, OperatorOuterClass} import org.apache.comet.serde.ExprOuterClass.Expr @@ -757,6 +757,8 @@ object CometIcebergNativeScan extends CometOperatorSerde[CometBatchScanExec] wit var totalTasks = 0 commonBuilder.setMetadataLocation(metadata.metadataLocation) + commonBuilder.setDataFileConcurrencyLimit( + CometConf.COMET_ICEBERG_DATA_FILE_CONCURRENCY_LIMIT.get()) metadata.catalogProperties.foreach { case (key, value) => commonBuilder.putCatalogProperties(key, value) } @@ -784,6 +786,7 @@ object CometIcebergNativeScan extends CometOperatorSerde[CometBatchScanExec] wit val startMethod = contentScanTaskClass.getMethod("start") val lengthMethod = contentScanTaskClass.getMethod("length") val residualMethod = contentScanTaskClass.getMethod("residual") + val fileSizeInBytesMethod = contentFileClass.getMethod("fileSizeInBytes") val taskSchemaMethod = fileScanTaskClass.getMethod("schema") val toJsonMethod = schemaParserClass.getMethod("toJson", schemaClass) toJsonMethod.setAccessible(true) @@ -838,6 +841,10 @@ object CometIcebergNativeScan extends CometOperatorSerde[CometBatchScanExec] wit val length = lengthMethod.invoke(task).asInstanceOf[Long] taskBuilder.setLength(length) + val fileSizeInBytes = + fileSizeInBytesMethod.invoke(dataFile).asInstanceOf[Long] + taskBuilder.setFileSizeInBytes(fileSizeInBytes) + val taskSchema = taskSchemaMethod.invoke(task) val deletes =