From eaa3f79b998fc433e930fc74ba648372d59e6ace Mon Sep 17 00:00:00 2001 From: Mimoune Date: Sat, 31 Jan 2026 01:32:45 +1000 Subject: [PATCH 01/87] Add use_fabric_endpoint parameter to MicrosoftAzure class (#1357) This change adds support for the use_fabric_endpoint parameter to the MicrosoftAzure object store class, enabling connections to Microsoft Fabric OneLake storage. The parameter allows users to specify that they want to use Data Lake Storage Gen2 endpoints (dfs.fabric.microsoft.com) instead of the default Azure Blob Storage endpoints (blob.core.windows.net), which is required for OneLake/Fabric storage access. Implementation follows the same pattern as existing boolean parameters (use_emulator, allow_http) by: - Adding the parameter to the PyO3 signature macro - Adding it as Option to the function parameters - Conditionally calling with_use_fabric_endpoint() on the builder Fixes #1356 Co-authored-by: Claude Sonnet 4.5 --- src/store.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/store.rs b/src/store.rs index dcbcbd325..3eae866bc 100644 --- a/src/store.rs +++ b/src/store.rs @@ -76,7 +76,7 @@ pub struct PyMicrosoftAzureContext { #[pymethods] impl PyMicrosoftAzureContext { #[allow(clippy::too_many_arguments)] - #[pyo3(signature = (container_name, account=None, access_key=None, bearer_token=None, client_id=None, client_secret=None, tenant_id=None, sas_query_pairs=None, use_emulator=None, allow_http=None))] + #[pyo3(signature = (container_name, account=None, access_key=None, bearer_token=None, client_id=None, client_secret=None, tenant_id=None, sas_query_pairs=None, use_emulator=None, allow_http=None, use_fabric_endpoint=None))] #[new] fn new( container_name: String, @@ -89,6 +89,7 @@ impl PyMicrosoftAzureContext { sas_query_pairs: Option>, use_emulator: Option, allow_http: Option, + use_fabric_endpoint: Option, ) -> Self { let mut builder = MicrosoftAzureBuilder::from_env().with_container_name(&container_name); @@ -127,6 +128,10 @@ impl PyMicrosoftAzureContext { builder = builder.with_allow_http(allow_http); } + if let Some(use_fabric_endpoint) = use_fabric_endpoint { + builder = builder.with_use_fabric_endpoint(use_fabric_endpoint); + } + Self { inner: Arc::new( builder From e7f5867f506e4d14b4993606782580d19af2a347 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Mon, 2 Feb 2026 09:27:54 -0500 Subject: [PATCH 02/87] Prepare for DF52 release (#1337) * Prepare for DF52 release * Update datafusion to point to release version * round now respects decimal * Update pathlib * Update documentation * Commit copilot suggestions * Pass codec capsule to table providers * Simplify codec passing around * Update signatures for FFI passing of logical codec around * Update upgrade guide for new method signature * Add more unit tests to cover FFI providers * Missing run command --- Cargo.lock | 373 ++--- Cargo.toml | 8 +- README.md | 2 +- benchmarks/tpch/tpch.py | 6 +- docs/source/contributor-guide/ffi.rst | 2 + docs/source/index.rst | 1 + docs/source/user-guide/upgrade-guides.rst | 96 ++ examples/datafusion-ffi-example/Cargo.lock | 1453 +++++------------ examples/datafusion-ffi-example/Cargo.toml | 15 +- .../python/tests/_test_catalog_provider.py | 116 +- .../python/tests/_test_schema_provider.py | 232 +++ .../python/tests/_test_table_function.py | 3 +- .../python/tests/_test_table_provider.py | 7 +- .../src/aggregate_udf.rs | 21 +- .../src/catalog_provider.rs | 89 +- examples/datafusion-ffi-example/src/lib.rs | 7 +- .../datafusion-ffi-example/src/scalar_udf.rs | 11 +- .../src/table_function.rs | 19 +- .../src/table_provider.rs | 19 +- examples/datafusion-ffi-example/src/utils.rs | 41 + .../datafusion-ffi-example/src/window_udf.rs | 19 +- examples/tpch/_tests.py | 4 +- python/datafusion/catalog.py | 22 +- python/datafusion/context.py | 20 +- python/datafusion/user_defined.py | 7 +- python/tests/test_dataframe.py | 5 +- python/tests/test_udtf.py | 119 ++ src/catalog.rs | 176 +- src/context.rs | 153 +- src/dataframe.rs | 12 +- src/expr.rs | 4 +- src/expr/scalar_variable.rs | 12 +- src/expr/statement.rs | 48 +- src/sql/logical.rs | 6 +- src/table.rs | 16 +- src/udaf.rs | 8 +- src/udf.rs | 8 +- src/udtf.rs | 38 +- src/udwf.rs | 29 +- src/utils.rs | 73 +- 40 files changed, 1690 insertions(+), 1610 deletions(-) create mode 100644 docs/source/user-guide/upgrade-guides.rst create mode 100644 examples/datafusion-ffi-example/python/tests/_test_schema_provider.py create mode 100644 examples/datafusion-ffi-example/src/utils.rs create mode 100644 python/tests/test_udtf.py diff --git a/Cargo.lock b/Cargo.lock index ac691ca9d..1fb17610e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -117,15 +117,16 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "apache-avro" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a033b4ced7c585199fb78ef50fca7fe2f444369ec48080c5fd072efa1a03cc7" +checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf" dependencies = [ "bigdecimal", "bon", - "bzip2 0.6.1", + "bzip2", "crc32fast", "digest", + "liblzma", "log", "miniz_oxide", "num-bigint", @@ -140,7 +141,6 @@ dependencies = [ "strum_macros", "thiserror", "uuid", - "xz2", "zstd", ] @@ -425,19 +425,15 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.19" +version = "0.4.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +checksum = "98ec5f6c2f8bc326c994cb9e241cc257ddaba9afa8555a43cffbb5dd86efaa37" dependencies = [ - "bzip2 0.5.2", - "flate2", + "compression-codecs", + "compression-core", "futures-core", - "memchr", "pin-project-lite", "tokio", - "xz2", - "zstd", - "zstd-safe", ] [[package]] @@ -613,15 +609,6 @@ version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" -[[package]] -name = "bzip2" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" -dependencies = [ - "bzip2-sys", -] - [[package]] name = "bzip2" version = "0.6.1" @@ -631,16 +618,6 @@ dependencies = [ "libbz2-rs-sys", ] -[[package]] -name = "bzip2-sys" -version = "0.1.13+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" -dependencies = [ - "cc", - "pkg-config", -] - [[package]] name = "cc" version = "1.2.51" @@ -706,6 +683,27 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "compression-codecs" +version = "0.4.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0f7ac3e5b97fdce45e8922fb05cae2c37f7bbd63d30dd94821dacfd8f3f2bf2" +dependencies = [ + "bzip2", + "compression-core", + "flate2", + "liblzma", + "memchr", + "zstd", + "zstd-safe", +] + +[[package]] +name = "compression-core" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" + [[package]] name = "const-random" version = "0.1.18" @@ -903,15 +901,15 @@ dependencies = [ [[package]] name = "datafusion" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" +checksum = "f02e9a7e70f214e5282db11c8effba173f4e25a00977e520c6b811817e3a082b" dependencies = [ "arrow", "arrow-schema", "async-trait", "bytes", - "bzip2 0.6.1", + "bzip2", "chrono", "datafusion-catalog", "datafusion-catalog-listing", @@ -942,27 +940,26 @@ dependencies = [ "flate2", "futures", "itertools", + "liblzma", "log", "object_store", "parking_lot", "parquet", "rand", "regex", - "rstest", "sqlparser", "tempfile", "tokio", "url", "uuid", - "xz2", "zstd", ] [[package]] name = "datafusion-catalog" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" +checksum = "f3e91b2603f906cf8cb8be84ba4e34f9d8fe6dbdfdd6916d55f22317074d1fdf" dependencies = [ "arrow", "async-trait", @@ -985,9 +982,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" +checksum = "919d20cdebddee4d8dca651aa0291a44c8104824d1ac288996a325c319ce31ba" dependencies = [ "arrow", "async-trait", @@ -1004,14 +1001,13 @@ dependencies = [ "itertools", "log", "object_store", - "tokio", ] [[package]] name = "datafusion-common" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" +checksum = "31ff2c4e95be40ad954de93862167b165a6fb49248bb882dea8aef4f888bc767" dependencies = [ "ahash", "apache-avro", @@ -1019,7 +1015,7 @@ dependencies = [ "arrow-ipc", "chrono", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "indexmap", "libc", "log", @@ -1034,9 +1030,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b92065bbc6532c6651e2f7dd30b55cba0c7a14f860c7e1d15f165c41a1868d95" +checksum = "0dd9f820fe58c2600b6c33a14432228dbaaf233b96c83a1fd61f16d073d5c3c5" dependencies = [ "futures", "log", @@ -1045,15 +1041,15 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" +checksum = "86b32b7b12645805d20b70aba6ba846cd262d7b073f7f617640c3294af108d44" dependencies = [ "arrow", "async-compression", "async-trait", "bytes", - "bzip2 0.6.1", + "bzip2", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -1068,21 +1064,21 @@ dependencies = [ "futures", "glob", "itertools", + "liblzma", "log", "object_store", "rand", "tokio", "tokio-util", "url", - "xz2", "zstd", ] [[package]] name = "datafusion-datasource-arrow" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" +checksum = "597695c8ebb723ee927b286139d43a3fbed6de7ad9210bd1a9fed5c721ac6fb1" dependencies = [ "arrow", "arrow-ipc", @@ -1104,9 +1100,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388ed8be535f562cc655b9c3d22edbfb0f1a50a25c242647a98b6d92a75b55a1" +checksum = "1c94347a431bac4bcb8408abb1cb5d40dab41f8d59c39db32f8f59e08144875f" dependencies = [ "apache-avro", "arrow", @@ -1124,9 +1120,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" +checksum = "6bb493d07d8da6d00a89ea9cc3e74a56795076d9faed5ac30284bd9ef37929e9" dependencies = [ "arrow", "async-trait", @@ -1147,9 +1143,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" +checksum = "5e9806521c4d3632f53b9a664041813c267c670232efa1452ef29faee71c3749" dependencies = [ "arrow", "async-trait", @@ -1169,9 +1165,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" +checksum = "f6a3ccd48d5034f8461f522114d0e46dfb3a9f0ce01a4d53a721024ace95d60d" dependencies = [ "arrow", "async-trait", @@ -1199,18 +1195,19 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" +checksum = "ff69a18418e9878d4840f35e2ad7f2a6386beedf192e9f065e628a7295ff5fbf" [[package]] name = "datafusion-execution" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" +checksum = "ccbc5e469b35d87c0b115327be83d68356ef9154684d32566315b5c071577e23" dependencies = [ "arrow", "async-trait", + "chrono", "dashmap", "datafusion-common", "datafusion-expr", @@ -1225,9 +1222,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" +checksum = "81ed3c02a3faf4e09356d5a314471703f440f0a6a14ca6addaf6cfb44ab14de5" dependencies = [ "arrow", "async-trait", @@ -1248,9 +1245,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" +checksum = "1567e60d21c372ca766dc9dde98efabe2b06d98f008d988fed00d93546bf5be7" dependencies = [ "arrow", "datafusion-common", @@ -1261,20 +1258,27 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec510e7787641279b0336e8b79e4b7bd1385d5976875ff9b97f4269ce5231a67" +checksum = "f3ec8492849520101bee44489da8c49f2df3c34d55fd6e502f8083e66a30cdff" dependencies = [ "abi_stable", "arrow", "arrow-schema", "async-ffi", "async-trait", - "datafusion", + "datafusion-catalog", "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", "datafusion-functions-aggregate-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", "datafusion-proto", "datafusion-proto-common", + "datafusion-session", "futures", "log", "prost", @@ -1284,9 +1288,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" +checksum = "c4593538abd95c27eeeb2f86b7ad827cce07d0c474eae9b122f4f9675f8c20ad" dependencies = [ "arrow", "arrow-buffer", @@ -1294,6 +1298,7 @@ dependencies = [ "blake2", "blake3", "chrono", + "chrono-tz", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1314,9 +1319,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" +checksum = "f81cdf609f43cd26156934fd81beb7215d60dda40a776c2e1b83d73df69434f2" dependencies = [ "ahash", "arrow", @@ -1335,9 +1340,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" +checksum = "9173f1bcea2ede4a5c23630a48469f06c9db9a408eb5fd140d1ff9a5e0c40ebf" dependencies = [ "ahash", "arrow", @@ -1348,9 +1353,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" +checksum = "1d0b9f32e7735a3b94ae8b9596d89080dc63dd139029a91133be370da099490d" dependencies = [ "arrow", "arrow-ord", @@ -1371,9 +1376,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" +checksum = "57a29e8a6201b3b9fb2be17d88e287c6d427948d64220cd5ea72ced614a1aee5" dependencies = [ "arrow", "async-trait", @@ -1387,9 +1392,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" +checksum = "cd412754964a31c515e5a814e5ce0edaf30f0ea975f3691e800eff115ee76dfb" dependencies = [ "arrow", "datafusion-common", @@ -1405,9 +1410,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc" +checksum = "d49be73a5ac0797398927a543118bd68e58e80bf95ebdabc77336bcd9c38a711" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1415,9 +1420,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" +checksum = "439ff5489dcac4d34ed7a49a93310c3345018c4469e34726fa471cdda725346d" dependencies = [ "datafusion-doc", "quote", @@ -1426,9 +1431,9 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" +checksum = "a80bb7de8ff5a9948799bc7749c292eac5c629385cdb582893ef2d80b6e718c4" dependencies = [ "arrow", "chrono", @@ -1446,9 +1451,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" +checksum = "83480008f66691a0047c5a88990bd76b7c1117dd8a49ca79959e214948b81f0a" dependencies = [ "ahash", "arrow", @@ -1458,19 +1463,21 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "indexmap", "itertools", "parking_lot", "paste", "petgraph 0.8.3", + "recursive", + "tokio", ] [[package]] name = "datafusion-physical-expr-adapter" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" +checksum = "6b438306446646b359666a658cc29d5494b1e9873bc7a57707689760666fc82c" dependencies = [ "arrow", "datafusion-common", @@ -1483,23 +1490,26 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" +checksum = "95b1fbf739038e0b313473588331c5bf79985d1b842b9937c1f10b170665cae1" dependencies = [ "ahash", "arrow", + "chrono", "datafusion-common", "datafusion-expr-common", - "hashbrown 0.14.5", + "hashbrown 0.16.1", + "indexmap", "itertools", + "parking_lot", ] [[package]] name = "datafusion-physical-optimizer" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" +checksum = "fc4cd3a170faa0f1de04bd4365ccfe309056746dd802ed276e8787ccb8e8a0d4" dependencies = [ "arrow", "datafusion-common", @@ -1516,27 +1526,27 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" +checksum = "a616a72b4ddf550652b36d5a7c0386eac4accea3ffc6c29a7b16c45f237e9882" dependencies = [ "ahash", "arrow", "arrow-ord", "arrow-schema", "async-trait", - "chrono", "datafusion-common", "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", + "datafusion-functions", "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "indexmap", "itertools", "log", @@ -1547,9 +1557,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d368093a98a17d1449b1083ac22ed16b7128e4c67789991869480d8c4a40ecb9" +checksum = "a0a96a57f60a53041c6d275dc46313bbe34f07ed51c4cea3557b321d4f3da7e0" dependencies = [ "arrow", "chrono", @@ -1574,9 +1584,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b6aef3d5e5c1d2bc3114c4876730cb76a9bdc5a8df31ef1b6db48f0c1671895" +checksum = "38f4c54f4a47608d2ef5af2fbaca9975cac9e48b93acf41b2ccf584405a3da5e" dependencies = [ "arrow", "datafusion-common", @@ -1585,9 +1595,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" +checksum = "4bf4b50be3ab65650452993eda4baf81edb245fb039b8714476b0f4c8801a527" dependencies = [ "arrow", "datafusion-common", @@ -1630,9 +1640,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776" +checksum = "66e080e2c105284460580c18e751b2133cc306df298181e4349b5b134632811a" dependencies = [ "async-trait", "datafusion-common", @@ -1644,9 +1654,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" +checksum = "3dac502db772ff9bffc2ceae321963091982e8d5f5dfcb877e8dc66fc9a093cc" dependencies = [ "arrow", "bigdecimal", @@ -1662,9 +1672,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2505af06d103a55b4e8ded0c6aeb6c72a771948da939c0bd3f8eee67af475a9c" +checksum = "bd76fd28a2804bf3d757ee19bfba8f818b3361157f0cdfa314609ad64f249b75" dependencies = [ "async-recursion", "async-trait", @@ -1868,12 +1878,6 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" -[[package]] -name = "futures-timer" -version = "3.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" - [[package]] name = "futures-util" version = "0.3.31" @@ -1980,10 +1984,6 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", -] [[package]] name = "hashbrown" @@ -2412,6 +2412,26 @@ dependencies = [ "winapi", ] +[[package]] +name = "liblzma" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73c36d08cad03a3fbe2c4e7bb3a9e84c57e4ee4135ed0b065cade3d98480c648" +dependencies = [ + "liblzma-sys", +] + +[[package]] +name = "liblzma-sys" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b9596486f6d60c3bbe644c0e1be1aa6ccc472ad630fe8927b456973d7cb736" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "libm" version = "0.2.15" @@ -2479,17 +2499,6 @@ dependencies = [ "twox-hash", ] -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "md-5" version = "0.10.6" @@ -2859,15 +2868,6 @@ dependencies = [ "syn 2.0.113", ] -[[package]] -name = "proc-macro-crate" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" -dependencies = [ - "toml_edit", -] - [[package]] name = "proc-macro2" version = "1.0.104" @@ -3222,12 +3222,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "relative-path" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" - [[package]] name = "repr_offset" version = "0.2.2" @@ -3293,35 +3287,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "rstest" -version = "0.26.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" -dependencies = [ - "futures-timer", - "futures-util", - "rstest_macros", -] - -[[package]] -name = "rstest_macros" -version = "0.26.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" -dependencies = [ - "cfg-if", - "glob", - "proc-macro-crate", - "proc-macro2", - "quote", - "regex", - "relative-path", - "rustc_version", - "syn 2.0.113", - "unicode-ident", -] - [[package]] name = "rustc-hash" version = "2.1.1" @@ -3941,36 +3906,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "toml_datetime" -version = "0.7.5+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" -dependencies = [ - "serde_core", -] - -[[package]] -name = "toml_edit" -version = "0.23.10+spec-1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269" -dependencies = [ - "indexmap", - "toml_datetime", - "toml_parser", - "winnow", -] - -[[package]] -name = "toml_parser" -version = "1.0.6+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" -dependencies = [ - "winnow", -] - [[package]] name = "tower" version = "0.5.2" @@ -4591,15 +4526,6 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" -[[package]] -name = "winnow" -version = "0.7.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" -dependencies = [ - "memchr", -] - [[package]] name = "wit-bindgen" version = "0.46.0" @@ -4612,15 +4538,6 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - [[package]] name = "yoke" version = "0.8.1" diff --git a/Cargo.toml b/Cargo.toml index 364713964..af2ffb012 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,10 +57,10 @@ pyo3-async-runtimes = { version = "0.26", features = ["tokio-runtime"] } pyo3-log = "0.13.2" arrow = { version = "57", features = ["pyarrow"] } arrow-select = { version = "57" } -datafusion = { version = "51", features = ["avro", "unicode_expressions"] } -datafusion-substrait = { version = "51", optional = true } -datafusion-proto = { version = "51" } -datafusion-ffi = { version = "51" } +datafusion = { version = "52", features = ["avro", "unicode_expressions"] } +datafusion-substrait = { version = "52", optional = true } +datafusion-proto = { version = "52" } +datafusion-ffi = { version = "52" } prost = "0.14.1" # keep in line with `datafusion-substrait` uuid = { version = "1.18", features = ["v4"] } mimalloc = { version = "0.1", optional = true, default-features = false, features = [ diff --git a/README.md b/README.md index 0cdf17ab8..810ac8710 100644 --- a/README.md +++ b/README.md @@ -275,7 +275,7 @@ needing to activate the virtual environment: ```bash uv run --no-project maturin develop --uv -uv --no-project pytest . +uv run --no-project pytest . ``` ### Running & Installing pre-commit hooks diff --git a/benchmarks/tpch/tpch.py b/benchmarks/tpch/tpch.py index 9cc897e76..ffee5554c 100644 --- a/benchmarks/tpch/tpch.py +++ b/benchmarks/tpch/tpch.py @@ -23,7 +23,7 @@ def bench(data_path, query_path) -> None: - with Path.open("results.csv", "w") as results: + with Path("results.csv").open("w") as results: # register tables start = time.time() total_time_millis = 0 @@ -46,7 +46,7 @@ def bench(data_path, query_path) -> None: print("Configuration:\n", ctx) # register tables - with Path.open("create_tables.sql") as f: + with Path("create_tables.sql").open() as f: sql = "" for line in f.readlines(): if line.startswith("--"): @@ -66,7 +66,7 @@ def bench(data_path, query_path) -> None: # run queries for query in range(1, 23): - with Path.open(f"{query_path}/q{query}.sql") as f: + with Path(f"{query_path}/q{query}.sql").open() as f: text = f.read() tmp = text.split(";") queries = [s.strip() for s in tmp if len(s.strip()) > 0] diff --git a/docs/source/contributor-guide/ffi.rst b/docs/source/contributor-guide/ffi.rst index 64413866f..5006b0ca4 100644 --- a/docs/source/contributor-guide/ffi.rst +++ b/docs/source/contributor-guide/ffi.rst @@ -15,6 +15,8 @@ .. specific language governing permissions and limitations .. under the License. +.. _ffi: + Python Extensions ================= diff --git a/docs/source/index.rst b/docs/source/index.rst index adec60f48..134d41cb6 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -77,6 +77,7 @@ Example user-guide/io/index user-guide/configuration user-guide/sql + user-guide/upgrade-guides .. _toc.contributor_guide: diff --git a/docs/source/user-guide/upgrade-guides.rst b/docs/source/user-guide/upgrade-guides.rst new file mode 100644 index 000000000..a77f60776 --- /dev/null +++ b/docs/source/user-guide/upgrade-guides.rst @@ -0,0 +1,96 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +Upgrade Guides +============== + +DataFusion 52.0.0 +----------------- + +This version includes a major update to the :ref:`ffi` due to upgrades +to the `Foreign Function Interface `_. +Users who contribute their own ``CatalogProvider``, ``SchemaProvider``, +``TableProvider`` or ``TableFunction`` via FFI must now provide access to a +``LogicalExtensionCodec`` and a ``TaskContextProvider``. The function signatures +for the methods to get these ``PyCapsule`` objects now requires an additional +parameter, which is a Python object that can be used to extract the +``FFI_LogicalExtensionCodec`` that is necessary. + +A complete example can be found in the `FFI example `_. +Your methods need to be updated to take an additional parameter like in this +example. + +.. code-block:: rust + + #[pymethods] + impl MyCatalogProvider { + pub fn __datafusion_catalog_provider__<'py>( + &self, + py: Python<'py>, + session: Bound, + ) -> PyResult> { + let name = cr"datafusion_catalog_provider".into(); + + let provider = Arc::clone(&self.inner) as Arc; + + let codec = ffi_logical_codec_from_pycapsule(session)?; + let provider = FFI_CatalogProvider::new_with_ffi_codec(provider, None, codec); + + PyCapsule::new(py, provider, Some(name)) + } + } + +To extract the logical extension codec FFI object from the provided object you +can implement a helper method such as: + +.. code-block:: rust + + pub(crate) fn ffi_logical_codec_from_pycapsule( + obj: Bound, + ) -> PyResult { + let attr_name = "__datafusion_logical_extension_codec__"; + let capsule = if obj.hasattr(attr_name)? { + obj.getattr(attr_name)?.call0()? + } else { + obj + }; + + let capsule = capsule.downcast::()?; + validate_pycapsule(capsule, "datafusion_logical_extension_codec")?; + + let codec = unsafe { capsule.reference::() }; + + Ok(codec.clone()) + } + + +The DataFusion FFI interface updates no longer depend directly on the +``datafusion`` core crate. You can improve your build times and potentially +reduce your library binary size by removing this dependency and instead +using the specific datafusion project crates. + +For example, instead of including expressions like: + +.. code-block:: rust + + use datafusion::catalog::MemTable; + +Instead you can now write: + +.. code-block:: rust + + use datafusion_catalog::MemTable; diff --git a/examples/datafusion-ffi-example/Cargo.lock b/examples/datafusion-ffi-example/Cargo.lock index 148c05edd..383716d41 100644 --- a/examples/datafusion-ffi-example/Cargo.lock +++ b/examples/datafusion-ffi-example/Cargo.lock @@ -64,7 +64,7 @@ checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", "const-random", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "version_check", "zerocopy", @@ -72,9 +72,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -111,27 +111,15 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.99" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" - -[[package]] -name = "arrayref" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" - -[[package]] -name = "arrayvec" -version = "0.7.6" +version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "arrow" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae" +checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" dependencies = [ "arrow-arith", "arrow-array", @@ -150,9 +138,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491" +checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" dependencies = [ "arrow-array", "arrow-buffer", @@ -164,9 +152,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31" +checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" dependencies = [ "ahash", "arrow-buffer", @@ -175,7 +163,7 @@ dependencies = [ "chrono", "chrono-tz", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "num-complex", "num-integer", "num-traits", @@ -183,9 +171,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27" +checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" dependencies = [ "bytes", "half", @@ -195,13 +183,14 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168" +checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" dependencies = [ "arrow-array", "arrow-buffer", "arrow-data", + "arrow-ord", "arrow-schema", "arrow-select", "atoi", @@ -216,9 +205,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92386159c8d4bce96f8bd396b0642a0d544d471bdc2ef34d631aec80db40a09c" +checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" dependencies = [ "arrow-array", "arrow-cast", @@ -231,9 +220,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b" +checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" dependencies = [ "arrow-buffer", "arrow-schema", @@ -244,9 +233,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9ba92e3de170295c98a84e5af22e2b037f0c7b32449445e6c493b5fca27f27" +checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" dependencies = [ "arrow-array", "arrow-buffer", @@ -255,14 +244,13 @@ dependencies = [ "arrow-select", "flatbuffers", "lz4_flex", - "zstd", ] [[package]] name = "arrow-json" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b969b4a421ae83828591c6bf5450bd52e6d489584142845ad6a861f42fe35df8" +checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" dependencies = [ "arrow-array", "arrow-buffer", @@ -284,9 +272,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b" +checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" dependencies = [ "arrow-array", "arrow-buffer", @@ -297,9 +285,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2" +checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" dependencies = [ "arrow-array", "arrow-buffer", @@ -310,20 +298,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5" +checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" dependencies = [ "bitflags", - "serde_core", - "serde_json", ] [[package]] name = "arrow-select" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47" +checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" dependencies = [ "ahash", "arrow-array", @@ -335,9 +321,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2" +checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" dependencies = [ "arrow-array", "arrow-buffer", @@ -362,23 +348,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "async-compression" -version = "0.4.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" -dependencies = [ - "bzip2 0.5.2", - "flate2", - "futures-core", - "memchr", - "pin-project-lite", - "tokio", - "xz2", - "zstd", - "zstd-safe", -] - [[package]] name = "async-ffi" version = "0.5.0" @@ -396,7 +365,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -420,55 +389,11 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" -[[package]] -name = "bigdecimal" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" -dependencies = [ - "autocfg", - "libm", - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "bitflags" -version = "2.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d" - -[[package]] -name = "blake2" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", -] - -[[package]] -name = "block-buffer" -version = "0.10.4" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] +checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" [[package]] name = "brotli" @@ -493,9 +418,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" [[package]] name = "byteorder" @@ -505,44 +430,17 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" - -[[package]] -name = "bzip2" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" -dependencies = [ - "bzip2-sys", -] - -[[package]] -name = "bzip2" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" -dependencies = [ - "libbz2-rs-sys", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.13+1.0.8" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" -dependencies = [ - "cc", - "pkg-config", -] +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" [[package]] name = "cc" -version = "1.2.34" +version = "1.2.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc" +checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3" dependencies = [ + "find-msvc-tools", "jobserver", "libc", "shlex", @@ -550,9 +448,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "chrono" @@ -562,7 +460,7 @@ checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" dependencies = [ "iana-time-zone", "num-traits", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -577,12 +475,11 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.2" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" dependencies = [ - "strum", - "strum_macros", + "unicode-segmentation", "unicode-width", ] @@ -608,19 +505,13 @@ dependencies = [ [[package]] name = "const_panic" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb8a602185c3c95b52f86dc78e55a6df9a287a7a93ddbcf012509930880cf879" +checksum = "e262cdaac42494e3ae34c43969f9cdeb7da178bdb4b66fa6a1ea2edb4c8ae652" dependencies = [ "typewit", ] -[[package]] -name = "constant_time_eq" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" - [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -642,15 +533,6 @@ version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "533d38ecd2709b7608fb8e18e4504deb99e9a72879e6aa66373a76d8dc4259ea" -[[package]] -name = "cpufeatures" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] - [[package]] name = "crc32fast" version = "1.5.0" @@ -681,33 +563,23 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "csv" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938" dependencies = [ "csv-core", "itoa", "ryu", - "serde", + "serde_core", ] [[package]] name = "csv-core" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" dependencies = [ "memchr", ] @@ -726,67 +598,11 @@ dependencies = [ "parking_lot_core", ] -[[package]] -name = "datafusion" -version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8" -dependencies = [ - "arrow", - "arrow-schema", - "async-trait", - "bytes", - "bzip2 0.6.1", - "chrono", - "datafusion-catalog", - "datafusion-catalog-listing", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-datasource-arrow", - "datafusion-datasource-csv", - "datafusion-datasource-json", - "datafusion-datasource-parquet", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", - "datafusion-functions-table", - "datafusion-functions-window", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", - "flate2", - "futures", - "itertools", - "log", - "object_store", - "parking_lot", - "parquet", - "rand", - "regex", - "rstest", - "sqlparser", - "tempfile", - "tokio", - "url", - "uuid", - "xz2", - "zstd", -] - [[package]] name = "datafusion-catalog" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d" +checksum = "f3e91b2603f906cf8cb8be84ba4e34f9d8fe6dbdfdd6916d55f22317074d1fdf" dependencies = [ "arrow", "async-trait", @@ -809,9 +625,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0" +checksum = "919d20cdebddee4d8dca651aa0291a44c8104824d1ac288996a325c319ce31ba" dependencies = [ "arrow", "async-trait", @@ -828,38 +644,35 @@ dependencies = [ "itertools", "log", "object_store", - "tokio", ] [[package]] name = "datafusion-common" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0" +checksum = "31ff2c4e95be40ad954de93862167b165a6fb49248bb882dea8aef4f888bc767" dependencies = [ "ahash", "arrow", "arrow-ipc", "chrono", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "indexmap", "libc", "log", "object_store", "parquet", "paste", - "recursive", - "sqlparser", "tokio", "web-time", ] [[package]] name = "datafusion-common-runtime" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b92065bbc6532c6651e2f7dd30b55cba0c7a14f860c7e1d15f165c41a1868d95" +checksum = "0dd9f820fe58c2600b6c33a14432228dbaaf233b96c83a1fd61f16d073d5c3c5" dependencies = [ "futures", "log", @@ -868,15 +681,13 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6" +checksum = "86b32b7b12645805d20b70aba6ba846cd262d7b073f7f617640c3294af108d44" dependencies = [ "arrow", - "async-compression", "async-trait", "bytes", - "bzip2 0.6.1", "chrono", "datafusion-common", "datafusion-common-runtime", @@ -887,7 +698,6 @@ dependencies = [ "datafusion-physical-expr-common", "datafusion-physical-plan", "datafusion-session", - "flate2", "futures", "glob", "itertools", @@ -895,17 +705,14 @@ dependencies = [ "object_store", "rand", "tokio", - "tokio-util", "url", - "xz2", - "zstd", ] [[package]] name = "datafusion-datasource-arrow" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea" +checksum = "597695c8ebb723ee927b286139d43a3fbed6de7ad9210bd1a9fed5c721ac6fb1" dependencies = [ "arrow", "arrow-ipc", @@ -927,9 +734,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568" +checksum = "6bb493d07d8da6d00a89ea9cc3e74a56795076d9faed5ac30284bd9ef37929e9" dependencies = [ "arrow", "async-trait", @@ -950,9 +757,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85" +checksum = "5e9806521c4d3632f53b9a664041813c267c670232efa1452ef29faee71c3749" dependencies = [ "arrow", "async-trait", @@ -972,9 +779,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9" +checksum = "f6a3ccd48d5034f8461f522114d0e46dfb3a9f0ce01a4d53a721024ace95d60d" dependencies = [ "arrow", "async-trait", @@ -1002,18 +809,19 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07" +checksum = "ff69a18418e9878d4840f35e2ad7f2a6386beedf192e9f065e628a7295ff5fbf" [[package]] name = "datafusion-execution" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178" +checksum = "ccbc5e469b35d87c0b115327be83d68356ef9154684d32566315b5c071577e23" dependencies = [ "arrow", "async-trait", + "chrono", "dashmap", "datafusion-common", "datafusion-expr", @@ -1028,9 +836,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91" +checksum = "81ed3c02a3faf4e09356d5a314471703f440f0a6a14ca6addaf6cfb44ab14de5" dependencies = [ "arrow", "async-trait", @@ -1044,16 +852,15 @@ dependencies = [ "indexmap", "itertools", "paste", - "recursive", "serde_json", "sqlparser", ] [[package]] name = "datafusion-expr-common" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc" +checksum = "1567e60d21c372ca766dc9dde98efabe2b06d98f008d988fed00d93546bf5be7" dependencies = [ "arrow", "datafusion-common", @@ -1064,20 +871,27 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec510e7787641279b0336e8b79e4b7bd1385d5976875ff9b97f4269ce5231a67" +checksum = "f3ec8492849520101bee44489da8c49f2df3c34d55fd6e502f8083e66a30cdff" dependencies = [ "abi_stable", "arrow", "arrow-schema", "async-ffi", "async-trait", - "datafusion", + "datafusion-catalog", "datafusion-common", + "datafusion-datasource", + "datafusion-execution", + "datafusion-expr", "datafusion-functions-aggregate-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", "datafusion-proto", "datafusion-proto-common", + "datafusion-session", "futures", "log", "prost", @@ -1093,24 +907,27 @@ dependencies = [ "arrow-array", "arrow-schema", "async-trait", - "datafusion", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", "datafusion-ffi", + "datafusion-functions-aggregate", + "datafusion-functions-window", "pyo3", "pyo3-build-config", ] [[package]] name = "datafusion-functions" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f" +checksum = "c4593538abd95c27eeeb2f86b7ad827cce07d0c474eae9b122f4f9675f8c20ad" dependencies = [ "arrow", "arrow-buffer", "base64", - "blake2", - "blake3", "chrono", + "chrono-tz", "datafusion-common", "datafusion-doc", "datafusion-execution", @@ -1120,20 +937,18 @@ dependencies = [ "hex", "itertools", "log", - "md-5", "num-traits", "rand", "regex", - "sha2", "unicode-segmentation", "uuid", ] [[package]] name = "datafusion-functions-aggregate" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305" +checksum = "f81cdf609f43cd26156934fd81beb7215d60dda40a776c2e1b83d73df69434f2" dependencies = [ "ahash", "arrow", @@ -1152,9 +967,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d" +checksum = "9173f1bcea2ede4a5c23630a48469f06c9db9a408eb5fd140d1ff9a5e0c40ebf" dependencies = [ "ahash", "arrow", @@ -1163,34 +978,11 @@ dependencies = [ "datafusion-physical-expr-common", ] -[[package]] -name = "datafusion-functions-nested" -version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4" -dependencies = [ - "arrow", - "arrow-ord", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr-common", - "itertools", - "log", - "paste", -] - [[package]] name = "datafusion-functions-table" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5" +checksum = "57a29e8a6201b3b9fb2be17d88e287c6d427948d64220cd5ea72ced614a1aee5" dependencies = [ "arrow", "async-trait", @@ -1204,9 +996,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1" +checksum = "cd412754964a31c515e5a814e5ce0edaf30f0ea975f3691e800eff115ee76dfb" dependencies = [ "arrow", "datafusion-common", @@ -1222,9 +1014,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc" +checksum = "d49be73a5ac0797398927a543118bd68e58e80bf95ebdabc77336bcd9c38a711" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1232,40 +1024,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848" +checksum = "439ff5489dcac4d34ed7a49a93310c3345018c4469e34726fa471cdda725346d" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.111", -] - -[[package]] -name = "datafusion-optimizer" -version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a" -dependencies = [ - "arrow", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "indexmap", - "itertools", - "log", - "recursive", - "regex", - "regex-syntax", + "syn 2.0.114", ] [[package]] name = "datafusion-physical-expr" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d" +checksum = "83480008f66691a0047c5a88990bd76b7c1117dd8a49ca79959e214948b81f0a" dependencies = [ "ahash", "arrow", @@ -1275,19 +1047,20 @@ dependencies = [ "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "indexmap", "itertools", "parking_lot", "paste", "petgraph", + "tokio", ] [[package]] name = "datafusion-physical-expr-adapter" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768" +checksum = "6b438306446646b359666a658cc29d5494b1e9873bc7a57707689760666fc82c" dependencies = [ "arrow", "datafusion-common", @@ -1300,60 +1073,44 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03" +checksum = "95b1fbf739038e0b313473588331c5bf79985d1b842b9937c1f10b170665cae1" dependencies = [ "ahash", "arrow", + "chrono", "datafusion-common", "datafusion-expr-common", - "hashbrown 0.14.5", - "itertools", -] - -[[package]] -name = "datafusion-physical-optimizer" -version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6" -dependencies = [ - "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-pruning", + "hashbrown 0.16.1", + "indexmap", "itertools", - "recursive", + "parking_lot", ] [[package]] name = "datafusion-physical-plan" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac" +checksum = "a616a72b4ddf550652b36d5a7c0386eac4accea3ffc6c29a7b16c45f237e9882" dependencies = [ "ahash", "arrow", "arrow-ord", "arrow-schema", "async-trait", - "chrono", "datafusion-common", "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", + "datafusion-functions", "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", "half", - "hashbrown 0.14.5", + "hashbrown 0.16.1", "indexmap", "itertools", "log", @@ -1364,9 +1121,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d368093a98a17d1449b1083ac22ed16b7128e4c67789991869480d8c4a40ecb9" +checksum = "a0a96a57f60a53041c6d275dc46313bbe34f07ed51c4cea3557b321d4f3da7e0" dependencies = [ "arrow", "chrono", @@ -1391,9 +1148,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b6aef3d5e5c1d2bc3114c4876730cb76a9bdc5a8df31ef1b6db48f0c1671895" +checksum = "38f4c54f4a47608d2ef5af2fbaca9975cac9e48b93acf41b2ccf584405a3da5e" dependencies = [ "arrow", "datafusion-common", @@ -1402,9 +1159,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db" +checksum = "4bf4b50be3ab65650452993eda4baf81edb245fb039b8714476b0f4c8801a527" dependencies = [ "arrow", "datafusion-common", @@ -1419,9 +1176,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "51.0.0" +version = "52.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776" +checksum = "66e080e2c105284460580c18e751b2133cc306df298181e4349b5b134632811a" dependencies = [ "async-trait", "datafusion-common", @@ -1431,35 +1188,6 @@ dependencies = [ "parking_lot", ] -[[package]] -name = "datafusion-sql" -version = "51.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f" -dependencies = [ - "arrow", - "bigdecimal", - "chrono", - "datafusion-common", - "datafusion-expr", - "indexmap", - "log", - "recursive", - "regex", - "sqlparser", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - [[package]] name = "displaydoc" version = "0.2.5" @@ -1468,7 +1196,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -1485,12 +1213,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.13" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys", ] [[package]] @@ -1499,6 +1227,12 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "find-msvc-tools" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41" + [[package]] name = "fixedbitset" version = "0.5.7" @@ -1507,9 +1241,9 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" [[package]] name = "flatbuffers" -version = "25.2.10" +version = "25.12.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" +checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" dependencies = [ "bitflags", "rustc_version", @@ -1527,16 +1261,16 @@ dependencies = [ ] [[package]] -name = "fnv" -version = "1.0.7" +name = "foldhash" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "foldhash" -version = "0.1.5" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" [[package]] name = "form_urlencoded" @@ -1603,7 +1337,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -1618,12 +1352,6 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" -[[package]] -name = "futures-timer" -version = "3.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" - [[package]] name = "futures-util" version = "0.3.31" @@ -1651,16 +1379,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" version = "0.2.16" @@ -1669,19 +1387,19 @@ checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", - "wasi 0.11.1+wasi-snapshot-preview1", + "wasi", ] [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", "r-efi", - "wasi 0.14.2+wasi-0.2.4", + "wasip2", ] [[package]] @@ -1707,10 +1425,6 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", -] [[package]] name = "hashbrown" @@ -1718,14 +1432,19 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "foldhash", + "foldhash 0.1.5", ] [[package]] name = "hashbrown" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "heck" @@ -1741,26 +1460,25 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] [[package]] name = "humantime" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" +checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "iana-time-zone" -version = "0.1.63" +version = "0.1.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1782,9 +1500,9 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" dependencies = [ "displaydoc", "potential_utf", @@ -1795,9 +1513,9 @@ dependencies = [ [[package]] name = "icu_locale_core" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" dependencies = [ "displaydoc", "litemap", @@ -1808,11 +1526,10 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" dependencies = [ - "displaydoc", "icu_collections", "icu_normalizer_data", "icu_properties", @@ -1823,42 +1540,38 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "2.0.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" dependencies = [ - "displaydoc", "icu_collections", "icu_locale_core", "icu_properties_data", "icu_provider", - "potential_utf", "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "2.0.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" [[package]] name = "icu_provider" -version = "2.0.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" dependencies = [ "displaydoc", "icu_locale_core", - "stable_deref_trait", - "tinystr", "writeable", "yoke", "zerofrom", @@ -1889,19 +1602,22 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.0" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", - "hashbrown 0.16.0", + "hashbrown 0.16.1", ] [[package]] name = "indoc" -version = "2.0.6" +version = "2.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] [[package]] name = "integer-encoding" @@ -1920,9 +1636,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" [[package]] name = "jobserver" @@ -1930,15 +1646,15 @@ version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "libc", ] [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -1946,9 +1662,9 @@ dependencies = [ [[package]] name = "lexical-core" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594" dependencies = [ "lexical-parse-float", "lexical-parse-integer", @@ -1959,66 +1675,53 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" dependencies = [ "lexical-parse-integer", "lexical-util", - "static_assertions", ] [[package]] name = "lexical-parse-integer" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" dependencies = [ "lexical-util", - "static_assertions", ] [[package]] name = "lexical-util" -version = "1.0.6" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" -dependencies = [ - "static_assertions", -] +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" [[package]] name = "lexical-write-float" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361" dependencies = [ "lexical-util", "lexical-write-integer", - "static_assertions", ] [[package]] name = "lexical-write-integer" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df" dependencies = [ "lexical-util", - "static_assertions", ] -[[package]] -name = "libbz2-rs-sys" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" - [[package]] name = "libc" -version = "0.2.177" +version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" [[package]] name = "libloading" @@ -2038,76 +1741,54 @@ checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" [[package]] name = "libz-rs-sys" -version = "0.5.1" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "172a788537a2221661b480fee8dc5f96c580eb34fa88764d3205dc356c7e4221" +checksum = "c10501e7805cee23da17c7790e59df2870c0d4043ec6d03f67d31e2b53e77415" dependencies = [ "zlib-rs", ] [[package]] name = "linux-raw-sys" -version = "0.9.4" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] name = "litemap" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" [[package]] name = "lock_api" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] [[package]] name = "log" -version = "0.4.27" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lz4_flex" -version = "0.11.5" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" +checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" dependencies = [ "twox-hash", ] -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] - [[package]] name = "memchr" -version = "2.7.5" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "memoffset" @@ -2207,9 +1888,9 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -2217,22 +1898,22 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.11" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-link", ] [[package]] name = "parquet" -version = "57.0.0" +version = "57.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0f31027ef1af7549f7cec603a9a21dce706d3f8d7c2060a68f43c1773be95a" +checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" dependencies = [ "ahash", "arrow-array", @@ -2249,7 +1930,7 @@ dependencies = [ "flate2", "futures", "half", - "hashbrown 0.16.0", + "hashbrown 0.16.1", "lz4_flex", "num-bigint", "num-integer", @@ -2327,15 +2008,15 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "portable-atomic" -version = "1.11.1" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" [[package]] name = "potential_utf" -version = "0.1.2" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" dependencies = [ "zerovec", ] @@ -2349,20 +2030,11 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "proc-macro-crate" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" -dependencies = [ - "toml_edit", -] - [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" dependencies = [ "unicode-ident", ] @@ -2387,23 +2059,14 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn 2.0.111", -] - -[[package]] -name = "psm" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" -dependencies = [ - "cc", + "syn 2.0.114", ] [[package]] name = "pyo3" -version = "0.27.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37a6df7eab65fc7bee654a421404947e10a0f7085b6951bf2ea395f4659fb0cf" +checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383" dependencies = [ "indoc", "libc", @@ -2418,18 +2081,18 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.27.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f77d387774f6f6eec64a004eac0ed525aab7fa1966d94b42f743797b3e395afb" +checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f" dependencies = [ "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.27.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dd13844a4242793e02df3e2ec093f540d948299a6a77ea9ce7afd8623f542be" +checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105" dependencies = [ "libc", "pyo3-build-config", @@ -2437,34 +2100,34 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.27.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf8f9f1108270b90d3676b8679586385430e5c0bb78bb5f043f95499c821a71" +checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "pyo3-macros-backend" -version = "0.27.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70a3b2274450ba5288bc9b8c1b69ff569d1d61189d4bff38f8d22e03d17f932b" +checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf" dependencies = [ "heck", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "quote" -version = "1.0.42" +version = "1.0.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" dependencies = [ "proc-macro2", ] @@ -2501,34 +2164,14 @@ version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.3", -] - -[[package]] -name = "recursive" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" -dependencies = [ - "recursive-proc-macro-impl", - "stacker", -] - -[[package]] -name = "recursive-proc-macro-impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" -dependencies = [ - "quote", - "syn 2.0.111", + "getrandom 0.3.4", ] [[package]] name = "redox_syscall" -version = "0.5.17" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ "bitflags", ] @@ -2558,15 +2201,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" - -[[package]] -name = "relative-path" -version = "1.9.3" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "repr_offset" @@ -2577,35 +2214,6 @@ dependencies = [ "tstr", ] -[[package]] -name = "rstest" -version = "0.26.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49" -dependencies = [ - "futures-timer", - "futures-util", - "rstest_macros", -] - -[[package]] -name = "rstest_macros" -version = "0.26.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0" -dependencies = [ - "cfg-if", - "glob", - "proc-macro-crate", - "proc-macro2", - "quote", - "regex", - "relative-path", - "rustc_version", - "syn 2.0.111", - "unicode-ident", -] - [[package]] name = "rustc_version" version = "0.4.1" @@ -2617,15 +2225,15 @@ dependencies = [ [[package]] name = "rustix" -version = "1.0.8" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.60.2", + "windows-sys", ] [[package]] @@ -2636,9 +2244,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "a50f4cf475b65d88e057964e0e9bb1f0aa9bbb2036dc65c64596b42932536984" [[package]] name = "same-file" @@ -2694,30 +2302,20 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "serde_json" -version = "1.0.143" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", -] - -[[package]] -name = "sha2" -version = "0.10.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", + "serde_core", + "zmij", ] [[package]] @@ -2728,9 +2326,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "simd-adler32" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] name = "simdutf8" @@ -2769,7 +2367,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f" dependencies = [ "log", - "recursive", "sqlparser_derive", ] @@ -2781,58 +2378,14 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "stable_deref_trait" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" - -[[package]] -name = "stacker" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys 0.59.0", -] - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" - -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.111", -] - -[[package]] -name = "subtle" -version = "2.6.1" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "syn" @@ -2847,9 +2400,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.111" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -2864,46 +2417,46 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "target-lexicon" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c" +checksum = "b1dd07eb858a2067e2f3c7155d54e929265c264e6f37efe3ee7a8d1b5a1dd0ba" [[package]] name = "tempfile" -version = "3.21.0" +version = "3.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" dependencies = [ "fastrand", - "getrandom 0.3.3", + "getrandom 0.3.4", "once_cell", "rustix", - "windows-sys 0.60.2", + "windows-sys", ] [[package]] name = "thiserror" -version = "2.0.16" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.16" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -2928,9 +2481,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" dependencies = [ "displaydoc", "zerovec", @@ -2938,9 +2491,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.48.0" +version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" dependencies = [ "bytes", "pin-project-lite", @@ -2955,57 +2508,14 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", -] - -[[package]] -name = "tokio-util" -version = "0.7.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "toml_datetime" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533" -dependencies = [ - "serde_core", -] - -[[package]] -name = "toml_edit" -version = "0.23.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" -dependencies = [ - "indexmap", - "toml_datetime", - "toml_parser", - "winnow", -] - -[[package]] -name = "toml_parser" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" -dependencies = [ - "winnow", + "syn 2.0.114", ] [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -3014,20 +2524,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", ] @@ -3049,9 +2559,9 @@ checksum = "e78122066b0cb818b8afd08f7ed22f7fdbc3e90815035726f0840d0d26c0747a" [[package]] name = "twox-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" [[package]] name = "typed-arena" @@ -3059,23 +2569,17 @@ version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" -[[package]] -name = "typenum" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" - [[package]] name = "typewit" -version = "1.13.0" +version = "1.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dd91acc53c592cb800c11c83e8e7ee1d48378d05cfa33b5474f5f80c5b236bf" +checksum = "f8c1ae7cc0fdb8b842d65d127cb981574b0d2b249b74d1c7a2986863dc134f71" [[package]] name = "unicode-ident" -version = "1.0.18" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-segmentation" @@ -3085,9 +2589,9 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "unindent" @@ -3097,9 +2601,9 @@ checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" [[package]] name = "url" -version = "2.5.7" +version = "2.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" dependencies = [ "form_urlencoded", "idna", @@ -3115,11 +2619,11 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f33196643e165781c20a5ead5582283a7dacbb87855d867fbc2df3f81eddc1be" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ - "getrandom 0.3.3", + "getrandom 0.3.4", "js-sys", "wasm-bindgen", ] @@ -3147,45 +2651,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" +name = "wasip2" +version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "wit-bindgen-rt", + "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn 2.0.111", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.50" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -3196,9 +2687,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -3206,31 +2697,31 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ + "bumpalo", "proc-macro2", "quote", - "syn 2.0.111", - "wasm-bindgen-backend", + "syn 2.0.114", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.77" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", @@ -3264,11 +2755,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.60.2", + "windows-sys", ] [[package]] @@ -3279,45 +2770,39 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-core" -version = "0.61.2" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", - "windows-link 0.1.3", + "windows-link", "windows-result", "windows-strings", ] [[package]] name = "windows-implement" -version = "0.60.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "windows-interface" -version = "0.59.1" +version = "0.59.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] -[[package]] -name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - [[package]] name = "windows-link" version = "0.2.1" @@ -3326,209 +2811,49 @@ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-result" -version = "0.3.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] name = "windows-strings" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" -dependencies = [ - "windows-link 0.1.3", -] - -[[package]] -name = "windows-sys" -version = "0.59.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-targets 0.52.6", + "windows-link", ] [[package]] name = "windows-sys" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" -dependencies = [ - "windows-targets 0.53.3", -] - -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm 0.52.6", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", -] - -[[package]] -name = "windows-targets" -version = "0.53.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" -dependencies = [ - "windows-link 0.1.3", - "windows_aarch64_gnullvm 0.53.0", - "windows_aarch64_msvc 0.53.0", - "windows_i686_gnu 0.53.0", - "windows_i686_gnullvm 0.53.0", - "windows_i686_msvc 0.53.0", - "windows_x86_64_gnu 0.53.0", - "windows_x86_64_gnullvm 0.53.0", - "windows_x86_64_msvc 0.53.0", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_i686_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.53.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" - -[[package]] -name = "winnow" -version = "0.7.13" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "memchr", + "windows-link", ] [[package]] -name = "wit-bindgen-rt" -version = "0.39.0" +name = "wit-bindgen" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "writeable" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" - -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" [[package]] name = "yoke" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" dependencies = [ - "serde", "stable_deref_trait", "yoke-derive", "zerofrom", @@ -3536,34 +2861,34 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.26" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.26" +version = "0.8.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] @@ -3583,15 +2908,15 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", "synstructure", ] [[package]] name = "zerotrie" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" dependencies = [ "displaydoc", "yoke", @@ -3600,9 +2925,9 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.11.4" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" dependencies = [ "yoke", "zerofrom", @@ -3611,20 +2936,26 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.11.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.111", + "syn 2.0.114", ] [[package]] name = "zlib-rs" -version = "0.5.1" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" + +[[package]] +name = "zmij" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626bd9fa9734751fc50d6060752170984d7053f5a39061f524cda68023d4db8a" +checksum = "2fc5a66a20078bf1251bde995aa2fdcc4b800c70b5d92dd2c62abc5c60f679f8" [[package]] name = "zstd" @@ -3646,9 +2977,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.15+zstd.1.5.7" +version = "2.0.16+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ "cc", "pkg-config", diff --git a/examples/datafusion-ffi-example/Cargo.toml b/examples/datafusion-ffi-example/Cargo.toml index 8150156d6..e6708fce8 100644 --- a/examples/datafusion-ffi-example/Cargo.toml +++ b/examples/datafusion-ffi-example/Cargo.toml @@ -21,16 +21,21 @@ version = "0.2.0" edition = "2021" [dependencies] -datafusion = { version = "51" } -datafusion-ffi = { version = "51" } -pyo3 = { version = "0.27", features = ["extension-module", "abi3", "abi3-py310"] } +datafusion-catalog = { version = "52" , default-features = false } +datafusion-common = { version = "52" , default-features = false } +datafusion-functions-aggregate = { version = "52" } +datafusion-functions-window = { version = "52" } +datafusion-expr = { version = "52" } +datafusion-ffi = { version = "52" } + +pyo3 = { version = "0.26", features = ["extension-module", "abi3", "abi3-py39"] } arrow = { version = "57" } arrow-array = { version = "57" } arrow-schema = { version = "57" } -async-trait = "0.1.88" +async-trait = "0.1.89" [build-dependencies] -pyo3-build-config = "0.27" +pyo3-build-config = "0.26" [lib] name = "datafusion_ffi_example" diff --git a/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py b/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py index 1bf1bf136..b26e12085 100644 --- a/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py +++ b/examples/datafusion-ffi-example/python/tests/_test_catalog_provider.py @@ -18,43 +18,95 @@ from __future__ import annotations import pyarrow as pa -from datafusion import SessionContext +import pyarrow.dataset as ds +import pytest +from datafusion import SessionContext, Table +from datafusion.catalog import Schema from datafusion_ffi_example import MyCatalogProvider -def test_catalog_provider(): - ctx = SessionContext() +def create_test_dataset() -> Table: + """Create a simple test dataset.""" + batch = pa.RecordBatch.from_arrays( + [pa.array([100, 200, 300]), pa.array([1.1, 2.2, 3.3])], + names=["id", "value"], + ) + dataset = ds.dataset([batch]) + return Table(dataset) + - my_catalog_name = "my_catalog" - expected_schema_name = "my_schema" - expected_table_name = "my_table" - expected_table_columns = ["units", "price"] +@pytest.mark.parametrize("inner_capsule", [True, False]) +def test_ffi_catalog_provider_basic(inner_capsule: bool) -> None: + """Test basic FFI CatalogProvider functionality.""" + ctx = SessionContext() + # Register FFI catalog catalog_provider = MyCatalogProvider() - ctx.register_catalog_provider(my_catalog_name, catalog_provider) - my_catalog = ctx.catalog(my_catalog_name) - - my_catalog_schemas = my_catalog.names() - assert expected_schema_name in my_catalog_schemas - my_schema = my_catalog.schema(expected_schema_name) - assert expected_table_name in my_schema.names() - my_table = my_schema.table(expected_table_name) - assert expected_table_columns == my_table.schema.names - - result = ctx.table( - f"{my_catalog_name}.{expected_schema_name}.{expected_table_name}" - ).collect() + if inner_capsule: + catalog_provider = catalog_provider.__datafusion_catalog_provider__(ctx) + + ctx.register_catalog_provider("ffi_catalog", catalog_provider) + + # Verify the catalog exists + catalog = ctx.catalog("ffi_catalog") + schema_names = catalog.names() + assert "my_schema" in schema_names + + # Query the pre-populated table + result = ctx.sql("SELECT * FROM ffi_catalog.my_schema.my_table").collect() assert len(result) == 2 + assert result[0].num_columns == 2 + + +def test_ffi_catalog_provider_register_schema(): + """Test registering additional schemas to FFI CatalogProvider.""" + ctx = SessionContext() + + catalog_provider = MyCatalogProvider() + ctx.register_catalog_provider("ffi_catalog", catalog_provider) + + catalog = ctx.catalog("ffi_catalog") + + # Register a new memory schema + new_schema = Schema.memory_schema() + catalog.register_schema("additional_schema", new_schema) + + # Verify the schema was registered + assert "additional_schema" in catalog.names() + + # Add a table to the new schema + new_schema.register_table("new_table", create_test_dataset()) + + # Query the new table + result = ctx.sql("SELECT * FROM ffi_catalog.additional_schema.new_table").collect() + assert len(result) == 1 + assert result[0].column(0) == pa.array([100, 200, 300]) + + +def test_ffi_catalog_provider_deregister_schema(): + """Test deregistering schemas from FFI CatalogProvider.""" + ctx = SessionContext() + + catalog_provider = MyCatalogProvider() + ctx.register_catalog_provider("ffi_catalog", catalog_provider) + + catalog = ctx.catalog("ffi_catalog") + + # Register two schemas + schema1 = Schema.memory_schema() + schema2 = Schema.memory_schema() + catalog.register_schema("temp_schema1", schema1) + catalog.register_schema("temp_schema2", schema2) + + # Verify both exist + names = catalog.names() + assert "temp_schema1" in names + assert "temp_schema2" in names + + # Deregister one schema + catalog.deregister_schema("temp_schema1") - col0_result = [r.column(0) for r in result] - col1_result = [r.column(1) for r in result] - expected_col0 = [ - pa.array([10, 20, 30], type=pa.int32()), - pa.array([5, 7], type=pa.int32()), - ] - expected_col1 = [ - pa.array([1, 2, 5], type=pa.float64()), - pa.array([1.5, 2.5], type=pa.float64()), - ] - assert col0_result == expected_col0 - assert col1_result == expected_col1 + # Verify it's gone + names = catalog.names() + assert "temp_schema1" not in names + assert "temp_schema2" in names diff --git a/examples/datafusion-ffi-example/python/tests/_test_schema_provider.py b/examples/datafusion-ffi-example/python/tests/_test_schema_provider.py new file mode 100644 index 000000000..93449c660 --- /dev/null +++ b/examples/datafusion-ffi-example/python/tests/_test_schema_provider.py @@ -0,0 +1,232 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import pyarrow as pa +import pyarrow.dataset as ds +import pytest +from datafusion import SessionContext, Table +from datafusion.catalog import Schema +from datafusion_ffi_example import FixedSchemaProvider, MyCatalogProvider + + +def create_test_dataset() -> Table: + """Create a simple test dataset.""" + batch = pa.RecordBatch.from_arrays( + [pa.array([100, 200, 300]), pa.array([1.1, 2.2, 3.3])], + names=["id", "value"], + ) + dataset = ds.dataset([batch]) + return Table(dataset) + + +@pytest.mark.parametrize("inner_capsule", [True, False]) +def test_schema_provider_extract_values(inner_capsule: bool) -> None: + ctx = SessionContext() + + my_schema_name = "my_schema" + + schema_provider = FixedSchemaProvider() + if inner_capsule: + schema_provider = schema_provider.__datafusion_schema_provider__(ctx) + + ctx.catalog().register_schema(my_schema_name, schema_provider) + + expected_schema_name = "my_schema" + expected_table_name = "my_table" + expected_table_columns = ["units", "price"] + + default_catalog = ctx.catalog() + + catalog_schemas = default_catalog.names() + assert expected_schema_name in catalog_schemas + my_schema = default_catalog.schema(expected_schema_name) + assert expected_table_name in my_schema.names() + my_table = my_schema.table(expected_table_name) + assert expected_table_columns == my_table.schema.names + + result = ctx.table(f"{expected_schema_name}.{expected_table_name}").collect() + assert len(result) == 2 + + col0_result = [r.column(0) for r in result] + col1_result = [r.column(1) for r in result] + expected_col0 = [ + pa.array([10, 20, 30], type=pa.int32()), + pa.array([5, 7], type=pa.int32()), + ] + expected_col1 = [ + pa.array([1, 2, 5], type=pa.float64()), + pa.array([1.5, 2.5], type=pa.float64()), + ] + assert col0_result == expected_col0 + assert col1_result == expected_col1 + + +def test_ffi_schema_provider_basic(): + """Test basic FFI SchemaProvider functionality.""" + ctx = SessionContext() + + # Register FFI schema + schema_provider = FixedSchemaProvider() + ctx.catalog().register_schema("ffi_schema", schema_provider) + + # Verify the schema exists + schema = ctx.catalog().schema("ffi_schema") + table_names = schema.names() + assert "my_table" in table_names + + # Query the pre-populated table + result = ctx.sql("SELECT * FROM ffi_schema.my_table").collect() + assert len(result) == 2 + assert result[0].num_columns == 2 + + +def test_ffi_schema_provider_register_table(): + """Test registering additional tables to FFI SchemaProvider.""" + ctx = SessionContext() + + schema_provider = FixedSchemaProvider() + ctx.catalog().register_schema("ffi_schema", schema_provider) + + schema = ctx.catalog().schema("ffi_schema") + + # Register a new table + schema.register_table("additional_table", create_test_dataset()) + + # Verify the table was registered + assert "additional_table" in schema.names() + + # Query the new table + result = ctx.sql("SELECT * FROM ffi_schema.additional_table").collect() + assert len(result) == 1 + assert result[0].column(0) == pa.array([100, 200, 300]) + assert result[0].column(1) == pa.array([1.1, 2.2, 3.3]) + + +def test_ffi_schema_provider_deregister_table(): + """Test deregistering tables from FFI SchemaProvider.""" + ctx = SessionContext() + + schema_provider = FixedSchemaProvider() + ctx.catalog().register_schema("ffi_schema", schema_provider) + + schema = ctx.catalog().schema("ffi_schema") + + # Register two tables + schema.register_table("temp_table1", create_test_dataset()) + schema.register_table("temp_table2", create_test_dataset()) + + # Verify both exist + names = schema.names() + assert "temp_table1" in names + assert "temp_table2" in names + + # Deregister one table + schema.deregister_table("temp_table1") + + # Verify it's gone + names = schema.names() + assert "temp_table1" not in names + assert "temp_table2" in names + + +def test_mixed_ffi_and_python_providers(): + """Test mixing FFI and Python providers in the same catalog/schema.""" + ctx = SessionContext() + + # Register FFI catalog + ffi_catalog = MyCatalogProvider() + ctx.register_catalog_provider("ffi_catalog", ffi_catalog) + + # Register Python memory schema to FFI catalog + python_schema = Schema.memory_schema() + ctx.catalog("ffi_catalog").register_schema("python_schema", python_schema) + + # Add table to Python schema + python_schema.register_table("python_table", create_test_dataset()) + + # Query both FFI table and Python table + result_ffi = ctx.sql("SELECT * FROM ffi_catalog.my_schema.my_table").collect() + assert len(result_ffi) == 2 + + result_python = ctx.sql( + "SELECT * FROM ffi_catalog.python_schema.python_table" + ).collect() + assert len(result_python) == 1 + assert result_python[0].column(0) == pa.array([100, 200, 300]) + + +def test_ffi_catalog_with_multiple_schemas(): + """Test FFI catalog with multiple schemas of different types.""" + ctx = SessionContext() + + catalog_provider = MyCatalogProvider() + ctx.register_catalog_provider("multi_catalog", catalog_provider) + + catalog = ctx.catalog("multi_catalog") + + # Register different types of schemas + ffi_schema = FixedSchemaProvider() + memory_schema = Schema.memory_schema() + + catalog.register_schema("ffi_schema", ffi_schema) + catalog.register_schema("memory_schema", memory_schema) + + # Add tables to memory schema + memory_schema.register_table("mem_table", create_test_dataset()) + + # Verify all schemas exist + names = catalog.names() + assert "my_schema" in names # Pre-populated + assert "ffi_schema" in names + assert "memory_schema" in names + + # Query tables from each schema + result = ctx.sql("SELECT * FROM multi_catalog.my_schema.my_table").collect() + assert len(result) == 2 + + result = ctx.sql("SELECT * FROM multi_catalog.ffi_schema.my_table").collect() + assert len(result) == 2 + + result = ctx.sql("SELECT * FROM multi_catalog.memory_schema.mem_table").collect() + assert len(result) == 1 + assert result[0].column(0) == pa.array([100, 200, 300]) + + +def test_ffi_schema_table_exist(): + """Test table_exist method on FFI SchemaProvider.""" + ctx = SessionContext() + + schema_provider = FixedSchemaProvider() + ctx.catalog().register_schema("ffi_schema", schema_provider) + + schema = ctx.catalog().schema("ffi_schema") + + # Check pre-populated table + assert schema.table_exist("my_table") + + # Check non-existent table + assert not schema.table_exist("nonexistent_table") + + # Register a new table and check + schema.register_table("new_table", create_test_dataset()) + assert schema.table_exist("new_table") + + # Deregister and check + schema.deregister_table("new_table") + assert not schema.table_exist("new_table") diff --git a/examples/datafusion-ffi-example/python/tests/_test_table_function.py b/examples/datafusion-ffi-example/python/tests/_test_table_function.py index 4b8b21454..bf5aae3bd 100644 --- a/examples/datafusion-ffi-example/python/tests/_test_table_function.py +++ b/examples/datafusion-ffi-example/python/tests/_test_table_function.py @@ -27,9 +27,10 @@ from datafusion.context import TableProviderExportable -def test_ffi_table_function_register(): +def test_ffi_table_function_register() -> None: ctx = SessionContext() table_func = MyTableFunction() + table_udtf = udtf(table_func, "my_table_func") ctx.register_udtf(table_udtf) result = ctx.sql("select * from my_table_func()").collect() diff --git a/examples/datafusion-ffi-example/python/tests/_test_table_provider.py b/examples/datafusion-ffi-example/python/tests/_test_table_provider.py index 48feaff64..fc77d2d3b 100644 --- a/examples/datafusion-ffi-example/python/tests/_test_table_provider.py +++ b/examples/datafusion-ffi-example/python/tests/_test_table_provider.py @@ -18,13 +18,18 @@ from __future__ import annotations import pyarrow as pa +import pytest from datafusion import SessionContext from datafusion_ffi_example import MyTableProvider -def test_table_loading(): +@pytest.mark.parametrize("inner_capsule", [True, False]) +def test_table_provider_ffi(inner_capsule: bool) -> None: ctx = SessionContext() table = MyTableProvider(3, 2, 4) + if inner_capsule: + table = table.__datafusion_table_provider__(ctx) + ctx.register_table("t", table) result = ctx.table("t").collect() diff --git a/examples/datafusion-ffi-example/src/aggregate_udf.rs b/examples/datafusion-ffi-example/src/aggregate_udf.rs index bb7505f7f..4eeb99232 100644 --- a/examples/datafusion-ffi-example/src/aggregate_udf.rs +++ b/examples/datafusion-ffi-example/src/aggregate_udf.rs @@ -15,19 +15,20 @@ // specific language governing permissions and limitations // under the License. +use std::any::Any; +use std::sync::Arc; + use arrow_schema::DataType; -use datafusion::error::Result as DataFusionResult; -use datafusion::functions_aggregate::sum::Sum; -use datafusion::logical_expr::function::AccumulatorArgs; -use datafusion::logical_expr::{Accumulator, AggregateUDF, AggregateUDFImpl, Signature}; +use datafusion_common::error::Result as DataFusionResult; +use datafusion_expr::function::AccumulatorArgs; +use datafusion_expr::{Accumulator, AggregateUDF, AggregateUDFImpl, Signature}; use datafusion_ffi::udaf::FFI_AggregateUDF; +use datafusion_functions_aggregate::sum::Sum; use pyo3::types::PyCapsule; use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; -use std::any::Any; -use std::sync::Arc; #[pyclass(name = "MySumUDF", module = "datafusion_ffi_example", subclass)] -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Eq, PartialEq, Hash)] pub(crate) struct MySumUDF { inner: Arc, } @@ -35,10 +36,10 @@ pub(crate) struct MySumUDF { #[pymethods] impl MySumUDF { #[new] - fn new() -> Self { - Self { + fn new() -> PyResult { + Ok(Self { inner: Arc::new(Sum::new()), - } + }) } fn __datafusion_aggregate_udf__<'py>( diff --git a/examples/datafusion-ffi-example/src/catalog_provider.rs b/examples/datafusion-ffi-example/src/catalog_provider.rs index cd2616916..570222748 100644 --- a/examples/datafusion-ffi-example/src/catalog_provider.rs +++ b/examples/datafusion-ffi-example/src/catalog_provider.rs @@ -15,24 +15,27 @@ // specific language governing permissions and limitations // under the License. -use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; -use std::{any::Any, fmt::Debug, sync::Arc}; +use std::any::Any; +use std::fmt::Debug; +use std::sync::Arc; use arrow::datatypes::Schema; use async_trait::async_trait; -use datafusion::{ - catalog::{ - CatalogProvider, MemoryCatalogProvider, MemorySchemaProvider, SchemaProvider, TableProvider, - }, - datasource::MemTable, - error::{DataFusionError, Result}, +use datafusion_catalog::{ + CatalogProvider, MemTable, MemoryCatalogProvider, MemorySchemaProvider, SchemaProvider, + TableProvider, }; +use datafusion_common::error::{DataFusionError, Result}; use datafusion_ffi::catalog_provider::FFI_CatalogProvider; +use datafusion_ffi::schema_provider::FFI_SchemaProvider; use pyo3::types::PyCapsule; +use pyo3::{pyclass, pymethods, Bound, PyAny, PyResult, Python}; + +use crate::utils::ffi_logical_codec_from_pycapsule; pub fn my_table() -> Arc { use arrow::datatypes::{DataType, Field}; - use datafusion::common::record_batch; + use datafusion_common::record_batch; let schema = Arc::new(Schema::new(vec![ Field::new("units", DataType::Int32, true), @@ -55,14 +58,19 @@ pub fn my_table() -> Arc { Arc::new(MemTable::try_new(schema, vec![partitions]).unwrap()) } +#[pyclass( + name = "FixedSchemaProvider", + module = "datafusion_ffi_example", + subclass +)] #[derive(Debug)] pub struct FixedSchemaProvider { - inner: MemorySchemaProvider, + inner: Arc, } impl Default for FixedSchemaProvider { fn default() -> Self { - let inner = MemorySchemaProvider::new(); + let inner = Arc::new(MemorySchemaProvider::new()); let table = my_table(); @@ -72,6 +80,29 @@ impl Default for FixedSchemaProvider { } } +#[pymethods] +impl FixedSchemaProvider { + #[new] + pub fn new() -> Self { + Self::default() + } + + pub fn __datafusion_schema_provider__<'py>( + &self, + py: Python<'py>, + session: Bound, + ) -> PyResult> { + let name = cr"datafusion_schema_provider".into(); + + let provider = Arc::clone(&self.inner) as Arc; + + let codec = ffi_logical_codec_from_pycapsule(session)?; + let provider = FFI_SchemaProvider::new_with_ffi_codec(provider, None, codec); + + PyCapsule::new(py, provider, Some(name)) + } +} + #[async_trait] impl SchemaProvider for FixedSchemaProvider { fn as_any(&self) -> &dyn Any { @@ -110,20 +141,9 @@ impl SchemaProvider for FixedSchemaProvider { module = "datafusion_ffi_example", subclass )] -#[derive(Debug)] +#[derive(Debug, Clone)] pub(crate) struct MyCatalogProvider { - inner: MemoryCatalogProvider, -} - -impl Default for MyCatalogProvider { - fn default() -> Self { - let inner = MemoryCatalogProvider::new(); - - let schema_name: &str = "my_schema"; - let _ = inner.register_schema(schema_name, Arc::new(FixedSchemaProvider::default())); - - Self { inner } - } + inner: Arc, } impl CatalogProvider for MyCatalogProvider { @@ -159,20 +179,27 @@ impl CatalogProvider for MyCatalogProvider { #[pymethods] impl MyCatalogProvider { #[new] - pub fn new() -> Self { - Self { - inner: Default::default(), - } + pub fn new() -> PyResult { + let inner = Arc::new(MemoryCatalogProvider::new()); + + let schema_name: &str = "my_schema"; + let _ = inner.register_schema(schema_name, Arc::new(FixedSchemaProvider::default())); + + Ok(Self { inner }) } pub fn __datafusion_catalog_provider__<'py>( &self, py: Python<'py>, + session: Bound, ) -> PyResult> { let name = cr"datafusion_catalog_provider".into(); - let catalog_provider = - FFI_CatalogProvider::new(Arc::new(MyCatalogProvider::default()), None); - PyCapsule::new(py, catalog_provider, Some(name)) + let provider = Arc::clone(&self.inner) as Arc; + + let codec = ffi_logical_codec_from_pycapsule(session)?; + let provider = FFI_CatalogProvider::new_with_ffi_codec(provider, None, codec); + + PyCapsule::new(py, provider, Some(name)) } } diff --git a/examples/datafusion-ffi-example/src/lib.rs b/examples/datafusion-ffi-example/src/lib.rs index f5f96cd49..005d8b80a 100644 --- a/examples/datafusion-ffi-example/src/lib.rs +++ b/examples/datafusion-ffi-example/src/lib.rs @@ -15,19 +15,21 @@ // specific language governing permissions and limitations // under the License. +use pyo3::prelude::*; + use crate::aggregate_udf::MySumUDF; -use crate::catalog_provider::MyCatalogProvider; +use crate::catalog_provider::{FixedSchemaProvider, MyCatalogProvider}; use crate::scalar_udf::IsNullUDF; use crate::table_function::MyTableFunction; use crate::table_provider::MyTableProvider; use crate::window_udf::MyRankUDF; -use pyo3::prelude::*; pub(crate) mod aggregate_udf; pub(crate) mod catalog_provider; pub(crate) mod scalar_udf; pub(crate) mod table_function; pub(crate) mod table_provider; +pub(crate) mod utils; pub(crate) mod window_udf; #[pymodule] @@ -35,6 +37,7 @@ fn datafusion_ffi_example(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/examples/datafusion-ffi-example/src/scalar_udf.rs b/examples/datafusion-ffi-example/src/scalar_udf.rs index 19b4e8b91..b3dd0591a 100644 --- a/examples/datafusion-ffi-example/src/scalar_udf.rs +++ b/examples/datafusion-ffi-example/src/scalar_udf.rs @@ -15,19 +15,20 @@ // specific language governing permissions and limitations // under the License. +use std::any::Any; +use std::sync::Arc; + use arrow_array::{Array, BooleanArray}; use arrow_schema::DataType; -use datafusion::common::ScalarValue; -use datafusion::error::Result as DataFusionResult; -use datafusion::logical_expr::{ +use datafusion_common::error::Result as DataFusionResult; +use datafusion_common::ScalarValue; +use datafusion_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, TypeSignature, Volatility, }; use datafusion_ffi::udf::FFI_ScalarUDF; use pyo3::types::PyCapsule; use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; -use std::any::Any; -use std::sync::Arc; #[pyclass(name = "IsNullUDF", module = "datafusion_ffi_example", subclass)] #[derive(Debug, Clone, PartialEq, Eq, Hash)] diff --git a/examples/datafusion-ffi-example/src/table_function.rs b/examples/datafusion-ffi-example/src/table_function.rs index 2d7b356e3..275759f68 100644 --- a/examples/datafusion-ffi-example/src/table_function.rs +++ b/examples/datafusion-ffi-example/src/table_function.rs @@ -15,14 +15,17 @@ // specific language governing permissions and limitations // under the License. -use crate::table_provider::MyTableProvider; -use datafusion::catalog::{TableFunctionImpl, TableProvider}; -use datafusion::error::Result as DataFusionResult; -use datafusion::prelude::Expr; +use std::sync::Arc; + +use datafusion_catalog::{TableFunctionImpl, TableProvider}; +use datafusion_common::error::Result as DataFusionResult; +use datafusion_expr::Expr; use datafusion_ffi::udtf::FFI_TableFunction; use pyo3::types::PyCapsule; -use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; -use std::sync::Arc; +use pyo3::{pyclass, pymethods, Bound, PyAny, PyResult, Python}; + +use crate::table_provider::MyTableProvider; +use crate::utils::ffi_logical_codec_from_pycapsule; #[pyclass(name = "MyTableFunction", module = "datafusion_ffi_example", subclass)] #[derive(Debug, Clone)] @@ -38,11 +41,13 @@ impl MyTableFunction { fn __datafusion_table_function__<'py>( &self, py: Python<'py>, + session: Bound, ) -> PyResult> { let name = cr"datafusion_table_function".into(); let func = self.clone(); - let provider = FFI_TableFunction::new(Arc::new(func), None); + let codec = ffi_logical_codec_from_pycapsule(session)?; + let provider = FFI_TableFunction::new_with_ffi_codec(Arc::new(func), None, codec); PyCapsule::new(py, provider, Some(name)) } diff --git a/examples/datafusion-ffi-example/src/table_provider.rs b/examples/datafusion-ffi-example/src/table_provider.rs index e884585b5..a7138b054 100644 --- a/examples/datafusion-ffi-example/src/table_provider.rs +++ b/examples/datafusion-ffi-example/src/table_provider.rs @@ -15,15 +15,18 @@ // specific language governing permissions and limitations // under the License. +use std::sync::Arc; + use arrow_array::{ArrayRef, RecordBatch}; use arrow_schema::{DataType, Field, Schema}; -use datafusion::catalog::MemTable; -use datafusion::error::{DataFusionError, Result as DataFusionResult}; +use datafusion_catalog::MemTable; +use datafusion_common::error::{DataFusionError, Result as DataFusionResult}; use datafusion_ffi::table_provider::FFI_TableProvider; use pyo3::exceptions::PyRuntimeError; use pyo3::types::PyCapsule; -use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; -use std::sync::Arc; +use pyo3::{pyclass, pymethods, Bound, PyAny, PyResult, Python}; + +use crate::utils::ffi_logical_codec_from_pycapsule; /// In order to provide a test that demonstrates different sized record batches, /// the first batch will have num_rows, the second batch num_rows+1, and so on. @@ -90,13 +93,17 @@ impl MyTableProvider { pub fn __datafusion_table_provider__<'py>( &self, py: Python<'py>, + session: Bound, ) -> PyResult> { let name = cr"datafusion_table_provider".into(); let provider = self .create_table() - .map_err(|e| PyRuntimeError::new_err(e.to_string()))?; - let provider = FFI_TableProvider::new(Arc::new(provider), false, None); + .map_err(|e: DataFusionError| PyRuntimeError::new_err(e.to_string()))?; + + let codec = ffi_logical_codec_from_pycapsule(session)?; + let provider = + FFI_TableProvider::new_with_ffi_codec(Arc::new(provider), false, None, codec); PyCapsule::new(py, provider, Some(name)) } diff --git a/examples/datafusion-ffi-example/src/utils.rs b/examples/datafusion-ffi-example/src/utils.rs new file mode 100644 index 000000000..d06dc2d6c --- /dev/null +++ b/examples/datafusion-ffi-example/src/utils.rs @@ -0,0 +1,41 @@ +use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods}; +use pyo3::types::PyCapsule; +use pyo3::{Bound, PyAny, PyResult}; + +pub(crate) fn ffi_logical_codec_from_pycapsule( + obj: Bound, +) -> PyResult { + let attr_name = "__datafusion_logical_extension_codec__"; + let capsule = if obj.hasattr(attr_name)? { + obj.getattr(attr_name)?.call0()? + } else { + obj + }; + + let capsule = capsule.downcast::()?; + validate_pycapsule(capsule, "datafusion_logical_extension_codec")?; + + let codec = unsafe { capsule.reference::() }; + + Ok(codec.clone()) +} + +pub(crate) fn validate_pycapsule(capsule: &Bound, name: &str) -> PyResult<()> { + let capsule_name = capsule.name()?; + if capsule_name.is_none() { + return Err(PyValueError::new_err(format!( + "Expected {name} PyCapsule to have name set." + ))); + } + + let capsule_name = capsule_name.unwrap().to_str()?; + if capsule_name != name { + return Err(PyValueError::new_err(format!( + "Expected name '{name}' in PyCapsule, instead got '{capsule_name}'" + ))); + } + + Ok(()) +} diff --git a/examples/datafusion-ffi-example/src/window_udf.rs b/examples/datafusion-ffi-example/src/window_udf.rs index 0ec8c7c2d..187ea6846 100644 --- a/examples/datafusion-ffi-example/src/window_udf.rs +++ b/examples/datafusion-ffi-example/src/window_udf.rs @@ -15,19 +15,20 @@ // specific language governing permissions and limitations // under the License. +use std::any::Any; +use std::sync::Arc; + use arrow_schema::{DataType, FieldRef}; -use datafusion::error::Result as DataFusionResult; -use datafusion::functions_window::rank::rank_udwf; -use datafusion::logical_expr::function::{PartitionEvaluatorArgs, WindowUDFFieldArgs}; -use datafusion::logical_expr::{PartitionEvaluator, Signature, WindowUDF, WindowUDFImpl}; +use datafusion_common::error::Result as DataFusionResult; +use datafusion_expr::function::{PartitionEvaluatorArgs, WindowUDFFieldArgs}; +use datafusion_expr::{PartitionEvaluator, Signature, WindowUDF, WindowUDFImpl}; use datafusion_ffi::udwf::FFI_WindowUDF; +use datafusion_functions_window::rank::rank_udwf; use pyo3::types::PyCapsule; use pyo3::{pyclass, pymethods, Bound, PyResult, Python}; -use std::any::Any; -use std::sync::Arc; #[pyclass(name = "MyRankUDF", module = "datafusion_ffi_example", subclass)] -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Eq, PartialEq, Hash)] pub(crate) struct MyRankUDF { inner: Arc, } @@ -35,8 +36,8 @@ pub(crate) struct MyRankUDF { #[pymethods] impl MyRankUDF { #[new] - fn new() -> Self { - Self { inner: rank_udwf() } + fn new() -> PyResult { + Ok(Self { inner: rank_udwf() }) } fn __datafusion_window_udf__<'py>(&self, py: Python<'py>) -> PyResult> { diff --git a/examples/tpch/_tests.py b/examples/tpch/_tests.py index 80ff80244..780fcf5e5 100644 --- a/examples/tpch/_tests.py +++ b/examples/tpch/_tests.py @@ -25,8 +25,10 @@ def df_selection(col_name, col_type): - if col_type == pa.float64() or isinstance(col_type, pa.Decimal128Type): + if col_type == pa.float64(): return F.round(col(col_name), lit(2)).alias(col_name) + if isinstance(col_type, pa.Decimal128Type): + return F.round(col(col_name).cast(pa.float64()), lit(2)).alias(col_name) if col_type == pa.string() or col_type == pa.string_view(): return F.trim(col(col_name)).alias(col_name) return col(col_name) diff --git a/python/datafusion/catalog.py b/python/datafusion/catalog.py index da54d233d..16c3ccc2a 100644 --- a/python/datafusion/catalog.py +++ b/python/datafusion/catalog.py @@ -27,7 +27,7 @@ if TYPE_CHECKING: import pyarrow as pa - from datafusion import DataFrame + from datafusion import DataFrame, SessionContext from datafusion.context import TableProviderExportable try: @@ -65,9 +65,9 @@ def schema_names(self) -> set[str]: return self.catalog.schema_names() @staticmethod - def memory_catalog() -> Catalog: + def memory_catalog(ctx: SessionContext | None = None) -> Catalog: """Create an in-memory catalog provider.""" - catalog = df_internal.catalog.RawCatalog.memory_catalog() + catalog = df_internal.catalog.RawCatalog.memory_catalog(ctx) return Catalog(catalog) def schema(self, name: str = "public") -> Schema: @@ -112,9 +112,9 @@ def __repr__(self) -> str: return self._raw_schema.__repr__() @staticmethod - def memory_schema() -> Schema: + def memory_schema(ctx: SessionContext | None = None) -> Schema: """Create an in-memory schema provider.""" - schema = df_internal.catalog.RawSchema.memory_schema() + schema = df_internal.catalog.RawSchema.memory_schema(ctx) return Schema(schema) def names(self) -> set[str]: @@ -141,6 +141,10 @@ def deregister_table(self, name: str) -> None: """Deregister a table provider from this schema.""" return self._raw_schema.deregister_table(name) + def table_exist(self, name: str) -> bool: + """Determines if a table exists in this schema.""" + return self._raw_schema.table_exist(name) + @deprecated("Use `Schema` instead.") class Database(Schema): @@ -163,10 +167,12 @@ class Table: __slots__ = ("_inner",) def __init__( - self, table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset + self, + table: Table | TableProviderExportable | DataFrame | pa.dataset.Dataset, + ctx: SessionContext | None = None, ) -> None: """Constructor.""" - self._inner = df_internal.catalog.RawTable(table) + self._inner = df_internal.catalog.RawTable(table, ctx) def __repr__(self) -> str: """Print a string representation of the table.""" @@ -271,4 +277,4 @@ class SchemaProviderExportable(Protocol): https://docs.rs/datafusion/latest/datafusion/catalog/trait.SchemaProvider.html """ - def __datafusion_schema_provider__(self) -> object: ... + def __datafusion_schema_provider__(self, session: Any) -> object: ... diff --git a/python/datafusion/context.py b/python/datafusion/context.py index 7dc06eb17..be647feff 100644 --- a/python/datafusion/context.py +++ b/python/datafusion/context.py @@ -88,7 +88,7 @@ class TableProviderExportable(Protocol): https://datafusion.apache.org/python/user-guide/io/table_provider.html """ - def __datafusion_table_provider__(self) -> object: ... # noqa: D105 + def __datafusion_table_provider__(self, session: Any) -> object: ... # noqa: D105 class CatalogProviderExportable(Protocol): @@ -97,7 +97,7 @@ class CatalogProviderExportable(Protocol): https://docs.rs/datafusion/latest/datafusion/catalog/trait.CatalogProvider.html """ - def __datafusion_catalog_provider__(self) -> object: ... # noqa: D105 + def __datafusion_catalog_provider__(self, session: Any) -> object: ... # noqa: D105 class SessionConfig: @@ -1301,3 +1301,19 @@ def _convert_table_partition_cols( ) return converted_table_partition_cols + + def __datafusion_task_context_provider__(self) -> Any: + """Access the PyCapsule FFI_TaskContextProvider.""" + return self.ctx.__datafusion_task_context_provider__() + + def __datafusion_logical_extension_codec__(self) -> Any: + """Access the PyCapsule FFI_LogicalExtensionCodec.""" + return self.ctx.__datafusion_logical_extension_codec__() + + def with_logical_extension_codec(self, codec: Any) -> SessionContext: + """Create a new session context with specified codec. + + This only supports codecs that have been implemented using the + FFI interface. + """ + return self.ctx.with_logical_extension_codec(codec) diff --git a/python/datafusion/user_defined.py b/python/datafusion/user_defined.py index 43a72c805..e6543e767 100644 --- a/python/datafusion/user_defined.py +++ b/python/datafusion/user_defined.py @@ -27,6 +27,7 @@ import pyarrow as pa import datafusion._internal as df_internal +from datafusion import SessionContext from datafusion.expr import Expr if TYPE_CHECKING: @@ -923,16 +924,14 @@ class TableFunction: """ def __init__( - self, - name: str, - func: Callable[[], any], + self, name: str, func: Callable[[], any], ctx: SessionContext | None = None ) -> None: """Instantiate a user-defined table function (UDTF). See :py:func:`udtf` for a convenience function and argument descriptions. """ - self._udtf = df_internal.TableFunction(name, func) + self._udtf = df_internal.TableFunction(name, func, ctx) def __call__(self, *args: Expr) -> Any: """Execute the UDTF and return a table provider.""" diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 30f9ab903..53a661969 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -1666,7 +1666,6 @@ def test_execution_plan(aggregate_df): # indent plan will be different for everyone due to absolute path # to filename, so we just check for some expected content assert "AggregateExec:" in indent - assert "CoalesceBatchesExec:" in indent assert "RepartitionExec:" in indent assert "DataSourceExec:" in indent assert "file_type=csv" in indent @@ -2435,9 +2434,7 @@ def test_write_parquet_with_options_writer_version( @pytest.mark.parametrize("writer_version", ["1.2.3", "custom-version", "0"]) def test_write_parquet_with_options_wrong_writer_version(df, tmp_path, writer_version): """Test that invalid writer versions in Parquet throw an exception.""" - with pytest.raises( - Exception, match="Unknown or unsupported parquet writer version" - ): + with pytest.raises(Exception, match="Invalid parquet writer version"): df.write_parquet_with_options( tmp_path, ParquetWriterOptions(writer_version=writer_version) ) diff --git a/python/tests/test_udtf.py b/python/tests/test_udtf.py new file mode 100644 index 000000000..dba46e61a --- /dev/null +++ b/python/tests/test_udtf.py @@ -0,0 +1,119 @@ +import pyarrow as pa +import pyarrow.dataset as ds +from datafusion import Expr, SessionContext, Table, udtf +from datafusion.context import TableProviderExportable + + +def python_table_function_inner( + num_cols: int, num_rows: int, num_batches: int +) -> Table: + rows = list(range(num_rows)) + cols = [pa.array(rows) for _ in range(num_cols)] + names = [f"row_{i}" for i in range(num_cols)] + + batch = pa.RecordBatch.from_arrays(cols, names=names) + batches = [batch for _ in range(num_batches)] + dataset = ds.dataset(batches) + + return Table(dataset) + + +def test_python_table_function_with_args_class() -> None: + """Test Python TableFunction using a class with arguments.""" + ctx = SessionContext() + + class ParameterizedTableFunction: + """Table function that takes parameters.""" + + def __call__(self, num_cols: Expr, num_rows: Expr, num_batches: Expr) -> Table: + return python_table_function_inner( + num_cols.to_variant().value_i64(), + num_rows.to_variant().value_i64(), + num_batches.to_variant().value_i64(), + ) + + # Register the function + table_func = ParameterizedTableFunction() + table_udtf = udtf(table_func, "param_func") + ctx.register_udtf(table_udtf) + + # Call with different parameters + result = ctx.sql("SELECT * FROM param_func(5, 10, 2)").collect() + assert len(result) == 2 + assert result[0].num_columns == 5 + assert result[0].num_rows == 10 + + +def test_python_table_function_decorator() -> None: + """Test Python TableFunction using decorator syntax.""" + ctx = SessionContext() + + @udtf("decorated_func") + def my_decorated_func(num_cols: Expr, num_rows: Expr, num_batches: Expr) -> Table: + return python_table_function_inner( + num_cols.to_variant().value_i64(), + num_rows.to_variant().value_i64(), + num_batches.to_variant().value_i64(), + ) + + ctx.register_udtf(my_decorated_func) + + result = ctx.sql("SELECT * FROM decorated_func(3, 7, 2)").collect() + assert len(result) == 2 + assert result[0].num_columns == 3 + assert result[0].num_rows == 7 + + +def test_python_table_function_no_args() -> None: + """Test Python TableFunction with no arguments.""" + ctx = SessionContext() + + @udtf("static_func") + def static_table_func() -> Table: + return python_table_function_inner(2, 3, 1) + + ctx.register_udtf(static_table_func) + + result = ctx.sql("SELECT * FROM static_func()").collect() + assert len(result) == 1 + assert list(result[0].column(0).to_pylist()) == [0, 1, 2] + assert list(result[0].column(1).to_pylist()) == [0, 1, 2] + + +def test_python_table_function_single_arg() -> None: + """Test Python TableFunction with a single argument.""" + ctx = SessionContext() + + @udtf("single_arg_func") + def single_arg_func(n: Expr) -> TableProviderExportable: + return python_table_function_inner(2, n.to_variant().value_i64(), 1) + + ctx.register_udtf(single_arg_func) + + result = ctx.sql("SELECT * FROM single_arg_func(15)").collect() + assert len(result) == 1 + assert result[0].num_columns == 2 + assert result[0].num_rows == 15 + + +def test_python_table_function_with_string_args() -> None: + """Test Python TableFunction with string arguments.""" + ctx = SessionContext() + + @udtf("string_arg_func") + def string_arg_func(prefix: Expr) -> TableProviderExportable: + prefix_str = prefix.to_variant().value_string() + # Create a table with the prefix in column names + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=[f"{prefix_str}_a", f"{prefix_str}_b"], + ) + + return Table(ds.dataset([batch])) + + ctx.register_udtf(string_arg_func) + + result = ctx.sql("SELECT * FROM string_arg_func('test')").collect() + assert len(result) == 1 + assert result[0].schema.names == ["test_a", "test_b"] diff --git a/src/catalog.rs b/src/catalog.rs index f8b86ec6a..10ca1dd12 100644 --- a/src/catalog.rs +++ b/src/catalog.rs @@ -25,7 +25,8 @@ use datafusion::catalog::{ }; use datafusion::common::DataFusionError; use datafusion::datasource::TableProvider; -use datafusion_ffi::schema_provider::{FFI_SchemaProvider, ForeignSchemaProvider}; +use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; +use datafusion_ffi::schema_provider::FFI_SchemaProvider; use pyo3::exceptions::PyKeyError; use pyo3::prelude::*; use pyo3::types::PyCapsule; @@ -34,54 +35,66 @@ use pyo3::IntoPyObjectExt; use crate::dataset::Dataset; use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionError, PyDataFusionResult}; use crate::table::PyTable; -use crate::utils::{validate_pycapsule, wait_for_future}; +use crate::utils::{ + create_logical_extension_capsule, extract_logical_extension_codec, validate_pycapsule, + wait_for_future, +}; #[pyclass(frozen, name = "RawCatalog", module = "datafusion.catalog", subclass)] #[derive(Clone)] pub struct PyCatalog { pub catalog: Arc, + codec: Arc, } #[pyclass(frozen, name = "RawSchema", module = "datafusion.catalog", subclass)] #[derive(Clone)] pub struct PySchema { pub schema: Arc, + codec: Arc, } -impl From> for PyCatalog { - fn from(catalog: Arc) -> Self { - Self { catalog } +impl PyCatalog { + pub(crate) fn new_from_parts( + catalog: Arc, + codec: Arc, + ) -> Self { + Self { catalog, codec } } } -impl From> for PySchema { - fn from(schema: Arc) -> Self { - Self { schema } +impl PySchema { + pub(crate) fn new_from_parts( + schema: Arc, + codec: Arc, + ) -> Self { + Self { schema, codec } } } #[pymethods] impl PyCatalog { #[new] - fn new(catalog: Py) -> Self { - let catalog_provider = - Arc::new(RustWrappedPyCatalogProvider::new(catalog)) as Arc; - catalog_provider.into() + pub fn new(py: Python, catalog: Py, session: Option>) -> PyResult { + let codec = extract_logical_extension_codec(py, session)?; + let catalog = Arc::new(RustWrappedPyCatalogProvider::new(catalog, codec.clone())) + as Arc; + Ok(Self { catalog, codec }) } #[staticmethod] - fn memory_catalog() -> Self { - let catalog_provider = - Arc::new(MemoryCatalogProvider::default()) as Arc; - catalog_provider.into() + pub fn memory_catalog(py: Python, session: Option>) -> PyResult { + let codec = extract_logical_extension_codec(py, session)?; + let catalog = Arc::new(MemoryCatalogProvider::default()) as Arc; + Ok(Self { catalog, codec }) } - fn schema_names(&self) -> HashSet { + pub fn schema_names(&self) -> HashSet { self.catalog.schema_names().into_iter().collect() } #[pyo3(signature = (name="public"))] - fn schema(&self, name: &str) -> PyResult> { + pub fn schema(&self, name: &str) -> PyResult> { let schema = self .catalog .schema(name) @@ -95,29 +108,13 @@ impl PyCatalog { .downcast_ref::() { Some(wrapped_schema) => Ok(wrapped_schema.schema_provider.clone_ref(py)), - None => PySchema::from(schema).into_py_any(py), + None => PySchema::new_from_parts(schema, self.codec.clone()).into_py_any(py), } }) } - fn register_schema(&self, name: &str, schema_provider: Bound<'_, PyAny>) -> PyResult<()> { - let provider = if schema_provider.hasattr("__datafusion_schema_provider__")? { - let capsule = schema_provider - .getattr("__datafusion_schema_provider__")? - .call0()?; - let capsule = capsule.downcast::().map_err(py_datafusion_err)?; - validate_pycapsule(capsule, "datafusion_schema_provider")?; - - let provider = unsafe { capsule.reference::() }; - let provider: ForeignSchemaProvider = provider.into(); - Arc::new(provider) as Arc - } else { - match schema_provider.extract::() { - Ok(py_schema) => py_schema.schema, - Err(_) => Arc::new(RustWrappedPySchemaProvider::new(schema_provider.into())) - as Arc, - } - }; + pub fn register_schema(&self, name: &str, schema_provider: Bound<'_, PyAny>) -> PyResult<()> { + let provider = extract_schema_provider_from_pyobj(schema_provider, self.codec.as_ref())?; let _ = self .catalog @@ -127,7 +124,7 @@ impl PyCatalog { Ok(()) } - fn deregister_schema(&self, name: &str, cascade: bool) -> PyResult<()> { + pub fn deregister_schema(&self, name: &str, cascade: bool) -> PyResult<()> { let _ = self .catalog .deregister_schema(name, cascade) @@ -136,7 +133,7 @@ impl PyCatalog { Ok(()) } - fn __repr__(&self) -> PyResult { + pub fn __repr__(&self) -> PyResult { let mut names: Vec = self.schema_names().into_iter().collect(); names.sort(); Ok(format!("Catalog(schema_names=[{}])", names.join(", "))) @@ -146,16 +143,22 @@ impl PyCatalog { #[pymethods] impl PySchema { #[new] - fn new(schema_provider: Py) -> Self { - let schema_provider = + pub fn new( + py: Python, + schema_provider: Py, + session: Option>, + ) -> PyResult { + let codec = extract_logical_extension_codec(py, session)?; + let schema = Arc::new(RustWrappedPySchemaProvider::new(schema_provider)) as Arc; - schema_provider.into() + Ok(Self { schema, codec }) } #[staticmethod] - fn memory_schema() -> Self { - let schema_provider = Arc::new(MemorySchemaProvider::default()) as Arc; - schema_provider.into() + fn memory_schema(py: Python, session: Option>) -> PyResult { + let codec = extract_logical_extension_codec(py, session)?; + let schema = Arc::new(MemorySchemaProvider::default()) as Arc; + Ok(Self { schema, codec }) } #[getter] @@ -179,8 +182,13 @@ impl PySchema { Ok(format!("Schema(table_names=[{}])", names.join(";"))) } - fn register_table(&self, name: &str, table_provider: &Bound<'_, PyAny>) -> PyResult<()> { - let table = PyTable::new(table_provider)?; + fn register_table(&self, name: &str, table_provider: Bound<'_, PyAny>) -> PyResult<()> { + let py = table_provider.py(); + let codec_capsule = create_logical_extension_capsule(py, self.codec.as_ref())? + .as_any() + .clone(); + + let table = PyTable::new(table_provider, Some(codec_capsule))?; let _ = self .schema @@ -198,6 +206,10 @@ impl PySchema { Ok(()) } + + fn table_exist(&self, name: &str) -> bool { + self.schema.table_exist(name) + } } #[derive(Debug)] @@ -232,7 +244,7 @@ impl RustWrappedPySchemaProvider { return Ok(None); } - let table = PyTable::new(&py_table)?; + let table = PyTable::new(py_table, None)?; Ok(Some(table.table)) }) @@ -326,11 +338,15 @@ impl SchemaProvider for RustWrappedPySchemaProvider { #[derive(Debug)] pub(crate) struct RustWrappedPyCatalogProvider { pub(crate) catalog_provider: Py, + codec: Arc, } impl RustWrappedPyCatalogProvider { - pub fn new(catalog_provider: Py) -> Self { - Self { catalog_provider } + pub fn new(catalog_provider: Py, codec: Arc) -> Self { + Self { + catalog_provider, + codec, + } } fn schema_inner(&self, name: &str) -> PyResult>> { @@ -342,32 +358,7 @@ impl RustWrappedPyCatalogProvider { return Ok(None); } - if py_schema.hasattr("__datafusion_schema_provider__")? { - let capsule = provider - .getattr("__datafusion_schema_provider__")? - .call0()?; - let capsule = capsule.downcast::().map_err(py_datafusion_err)?; - validate_pycapsule(capsule, "datafusion_schema_provider")?; - - let provider = unsafe { capsule.reference::() }; - let provider: ForeignSchemaProvider = provider.into(); - - Ok(Some(Arc::new(provider) as Arc)) - } else { - if let Ok(inner_schema) = py_schema.getattr("schema") { - if let Ok(inner_schema) = inner_schema.extract::() { - return Ok(Some(inner_schema.schema)); - } - } - match py_schema.extract::() { - Ok(inner_schema) => Ok(Some(inner_schema.schema)), - Err(_) => { - let py_schema = RustWrappedPySchemaProvider::new(py_schema.into()); - - Ok(Some(Arc::new(py_schema) as Arc)) - } - } - } + extract_schema_provider_from_pyobj(py_schema, self.codec.as_ref()).map(Some) }) } } @@ -403,15 +394,13 @@ impl CatalogProvider for RustWrappedPyCatalogProvider { name: &str, schema: Arc, ) -> datafusion::common::Result>> { - // JRIGHT HERE - // let py_schema: PySchema = schema.into(); Python::attach(|py| { let py_schema = match schema .as_any() .downcast_ref::() { Some(wrapped_schema) => wrapped_schema.schema_provider.as_any(), - None => &PySchema::from(schema) + None => &PySchema::new_from_parts(schema, self.codec.clone()) .into_py_any(py) .map_err(to_datafusion_err)?, }; @@ -453,6 +442,35 @@ impl CatalogProvider for RustWrappedPyCatalogProvider { } } +fn extract_schema_provider_from_pyobj( + mut schema_provider: Bound, + codec: &FFI_LogicalExtensionCodec, +) -> PyResult> { + if schema_provider.hasattr("__datafusion_schema_provider__")? { + let py = schema_provider.py(); + let codec_capsule = create_logical_extension_capsule(py, codec)?; + schema_provider = schema_provider + .getattr("__datafusion_schema_provider__")? + .call1((codec_capsule,))?; + } + + let provider = if let Ok(capsule) = schema_provider.downcast::() { + validate_pycapsule(capsule, "datafusion_schema_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: Arc = provider.into(); + provider as Arc + } else { + match schema_provider.extract::() { + Ok(py_schema) => py_schema.schema, + Err(_) => Arc::new(RustWrappedPySchemaProvider::new(schema_provider.into())) + as Arc, + } + }; + + Ok(provider) +} + pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; diff --git a/src/context.rs b/src/context.rs index ad4fc36b1..1cd04ac2f 100644 --- a/src/context.rs +++ b/src/context.rs @@ -42,14 +42,18 @@ use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, Unboun use datafusion::execution::options::ReadOptions; use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::execution::session_state::SessionStateBuilder; +use datafusion::execution::TaskContextProvider; use datafusion::prelude::{ AvroReadOptions, CsvReadOptions, DataFrame, NdJsonReadOptions, ParquetReadOptions, }; -use datafusion_ffi::catalog_provider::{FFI_CatalogProvider, ForeignCatalogProvider}; +use datafusion_ffi::catalog_provider::FFI_CatalogProvider; +use datafusion_ffi::execution::FFI_TaskContextProvider; +use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; +use datafusion_proto::logical_plan::DefaultLogicalExtensionCodec; use object_store::ObjectStore; use pyo3::exceptions::{PyKeyError, PyValueError}; use pyo3::prelude::*; -use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple, PyType}; +use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple}; use pyo3::IntoPyObjectExt; use url::Url; use uuid::Uuid; @@ -71,7 +75,10 @@ use crate::udaf::PyAggregateUDF; use crate::udf::PyScalarUDF; use crate::udtf::PyTableFunction; use crate::udwf::PyWindowUDF; -use crate::utils::{get_global_ctx, spawn_future, validate_pycapsule, wait_for_future}; +use crate::utils::{ + create_logical_extension_capsule, extract_logical_extension_codec, get_global_ctx, + get_tokio_runtime, spawn_future, validate_pycapsule, wait_for_future, +}; /// Configuration options for a SessionContext #[pyclass(frozen, name = "SessionConfig", module = "datafusion", subclass)] @@ -296,7 +303,8 @@ impl PySQLOptions { #[pyclass(frozen, name = "SessionContext", module = "datafusion", subclass)] #[derive(Clone)] pub struct PySessionContext { - pub ctx: SessionContext, + pub ctx: Arc, + logical_codec: Arc, } #[pymethods] @@ -323,23 +331,24 @@ impl PySessionContext { .with_runtime_env(runtime) .with_default_features() .build(); - Ok(PySessionContext { - ctx: SessionContext::new_with_state(session_state), - }) + let ctx = Arc::new(SessionContext::new_with_state(session_state)); + let logical_codec = Self::default_logical_codec(&ctx); + Ok(PySessionContext { ctx, logical_codec }) } pub fn enable_url_table(&self) -> PyResult { Ok(PySessionContext { - ctx: self.ctx.clone().enable_url_table(), + ctx: Arc::new(self.ctx.as_ref().clone().enable_url_table()), + logical_codec: Arc::clone(&self.logical_codec), }) } - #[classmethod] + #[staticmethod] #[pyo3(signature = ())] - fn global_ctx(_cls: &Bound<'_, PyType>) -> PyResult { - Ok(Self { - ctx: get_global_ctx().clone(), - }) + pub fn global_ctx() -> PyResult { + let ctx = get_global_ctx().clone(); + let logical_codec = Self::default_logical_codec(&ctx); + Ok(Self { ctx, logical_codec }) } /// Register an object store with the given name @@ -606,7 +615,8 @@ impl PySessionContext { } pub fn register_table(&self, name: &str, table: Bound<'_, PyAny>) -> PyDataFusionResult<()> { - let table = PyTable::new(&table)?; + let session = self.clone().into_bound_py_any(table.py())?; + let table = PyTable::new(table, Some(session))?; self.ctx.register_table(name, table.table)?; Ok(()) @@ -620,25 +630,32 @@ impl PySessionContext { pub fn register_catalog_provider( &self, name: &str, - provider: Bound<'_, PyAny>, + mut provider: Bound<'_, PyAny>, ) -> PyDataFusionResult<()> { - let provider = if provider.hasattr("__datafusion_catalog_provider__")? { - let capsule = provider + if provider.hasattr("__datafusion_catalog_provider__")? { + let py = provider.py(); + let codec_capsule = create_logical_extension_capsule(py, self.logical_codec.as_ref())?; + provider = provider .getattr("__datafusion_catalog_provider__")? - .call0()?; - let capsule = capsule.downcast::().map_err(py_datafusion_err)?; - validate_pycapsule(capsule, "datafusion_catalog_provider")?; + .call1((codec_capsule,))?; + } - let provider = unsafe { capsule.reference::() }; - let provider: ForeignCatalogProvider = provider.into(); - Arc::new(provider) as Arc - } else { - match provider.extract::() { - Ok(py_catalog) => py_catalog.catalog, - Err(_) => Arc::new(RustWrappedPyCatalogProvider::new(provider.into())) - as Arc, - } - }; + let provider = + if let Ok(capsule) = provider.downcast::().map_err(py_datafusion_err) { + validate_pycapsule(capsule, "datafusion_catalog_provider")?; + + let provider = unsafe { capsule.reference::() }; + let provider: Arc = provider.into(); + provider as Arc + } else { + match provider.extract::() { + Ok(py_catalog) => py_catalog.catalog, + Err(_) => Arc::new(RustWrappedPyCatalogProvider::new( + provider.into(), + Arc::clone(&self.logical_codec), + )) as Arc, + } + }; let _ = self.ctx.register_catalog(name, provider); @@ -861,20 +878,21 @@ impl PySessionContext { } #[pyo3(signature = (name="datafusion"))] - pub fn catalog(&self, name: &str) -> PyResult> { + pub fn catalog(&self, py: Python, name: &str) -> PyResult> { let catalog = self.ctx.catalog(name).ok_or(PyKeyError::new_err(format!( "Catalog with name {name} doesn't exist." )))?; - Python::attach(|py| { - match catalog - .as_any() - .downcast_ref::() - { - Some(wrapped_schema) => Ok(wrapped_schema.catalog_provider.clone_ref(py)), - None => PyCatalog::from(catalog).into_py_any(py), - } - }) + match catalog + .as_any() + .downcast_ref::() + { + Some(wrapped_schema) => Ok(wrapped_schema.catalog_provider.clone_ref(py)), + None => Ok( + PyCatalog::new_from_parts(catalog, Arc::clone(&self.logical_codec)) + .into_py_any(py)?, + ), + } } pub fn catalog_names(&self) -> HashSet { @@ -1088,7 +1106,8 @@ impl PySessionContext { } pub fn read_table(&self, table: Bound<'_, PyAny>) -> PyDataFusionResult { - let table = PyTable::new(&table)?; + let session = self.clone().into_bound_py_any(table.py())?; + let table = PyTable::new(table, Some(session))?; let df = self.ctx.read_table(table.table())?; Ok(PyDataFrame::new(df)) } @@ -1122,6 +1141,40 @@ impl PySessionContext { let stream = spawn_future(py, async move { plan.execute(part, Arc::new(ctx)) })?; Ok(PyRecordBatchStream::new(stream)) } + + pub fn __datafusion_task_context_provider__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + let name = cr"datafusion_task_context_provider".into(); + + let ctx_provider = Arc::clone(&self.ctx) as Arc; + let ffi_ctx_provider = FFI_TaskContextProvider::from(&ctx_provider); + + PyCapsule::new(py, ffi_ctx_provider, Some(name)) + } + + pub fn __datafusion_logical_extension_codec__<'py>( + &self, + py: Python<'py>, + ) -> PyResult> { + create_logical_extension_capsule(py, self.logical_codec.as_ref()) + } + + pub fn with_logical_extension_codec<'py>( + &self, + codec: Bound<'py, PyAny>, + ) -> PyDataFusionResult { + let py = codec.py(); + let logical_codec = extract_logical_extension_codec(py, Some(codec))?; + + Ok({ + Self { + ctx: Arc::clone(&self.ctx), + logical_codec, + } + }) + } } impl PySessionContext { @@ -1168,6 +1221,17 @@ impl PySessionContext { .register_table(TableReference::Bare { table: name.into() }, Arc::new(table))?; Ok(()) } + + fn default_logical_codec(ctx: &Arc) -> Arc { + let codec = Arc::new(DefaultLogicalExtensionCodec {}); + let runtime = get_tokio_runtime().0.handle().clone(); + let ctx_provider = Arc::clone(ctx) as Arc; + Arc::new(FFI_LogicalExtensionCodec::new( + codec, + Some(runtime), + &ctx_provider, + )) + } } pub fn parse_file_compression_type( @@ -1181,12 +1245,15 @@ pub fn parse_file_compression_type( impl From for SessionContext { fn from(ctx: PySessionContext) -> SessionContext { - ctx.ctx + ctx.ctx.as_ref().clone() } } impl From for PySessionContext { fn from(ctx: SessionContext) -> PySessionContext { - PySessionContext { ctx } + let ctx = Arc::new(ctx); + let logical_codec = Self::default_logical_codec(&ctx); + + PySessionContext { ctx, logical_codec } } } diff --git a/src/dataframe.rs b/src/dataframe.rs index d920df71e..79b76779b 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -17,6 +17,7 @@ use std::collections::HashMap; use std::ffi::{CStr, CString}; +use std::str::FromStr; use std::sync::Arc; use arrow::array::{new_null_array, Array, ArrayRef, RecordBatch, RecordBatchReader}; @@ -175,7 +176,7 @@ impl PyParquetWriterOptions { pub fn new( data_pagesize_limit: usize, write_batch_size: usize, - writer_version: String, + writer_version: &str, skip_arrow_metadata: bool, compression: Option, dictionary_enabled: Option, @@ -193,8 +194,11 @@ impl PyParquetWriterOptions { allow_single_file_parallelism: bool, maximum_parallel_row_group_writers: usize, maximum_buffered_record_batches_per_stream: usize, - ) -> Self { - Self { + ) -> PyResult { + let writer_version = + datafusion::common::parquet_config::DFParquetWriterVersion::from_str(writer_version) + .map_err(py_datafusion_err)?; + Ok(Self { options: ParquetOptions { data_pagesize_limit, write_batch_size, @@ -218,7 +222,7 @@ impl PyParquetWriterOptions { maximum_buffered_record_batches_per_stream, ..Default::default() }, - } + }) } } diff --git a/src/expr.rs b/src/expr.rs index fc8023b20..c5776c597 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -145,8 +145,8 @@ impl PyExpr { match &self.expr { Expr::Alias(alias) => Ok(PyAlias::from(alias.clone()).into_bound_py_any(py)?), Expr::Column(col) => Ok(PyColumn::from(col.clone()).into_bound_py_any(py)?), - Expr::ScalarVariable(data_type, variables) => { - Ok(PyScalarVariable::new(data_type, variables).into_bound_py_any(py)?) + Expr::ScalarVariable(field, variables) => { + Ok(PyScalarVariable::new(field, variables).into_bound_py_any(py)?) } Expr::Like(value) => Ok(PyLike::from(value.clone()).into_bound_py_any(py)?), Expr::Literal(value, metadata) => Ok(PyLiteral::new_with_metadata(value.clone(), metadata.clone()).into_bound_py_any(py)?), diff --git a/src/expr/scalar_variable.rs b/src/expr/scalar_variable.rs index f3c128a4c..5bc056e76 100644 --- a/src/expr/scalar_variable.rs +++ b/src/expr/scalar_variable.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use datafusion::arrow::datatypes::DataType; +use arrow::datatypes::FieldRef; use pyo3::prelude::*; use crate::common::data_type::PyDataType; @@ -23,14 +23,14 @@ use crate::common::data_type::PyDataType; #[pyclass(frozen, name = "ScalarVariable", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PyScalarVariable { - data_type: DataType, + field: FieldRef, variables: Vec, } impl PyScalarVariable { - pub fn new(data_type: &DataType, variables: &[String]) -> Self { + pub fn new(field: &FieldRef, variables: &[String]) -> Self { Self { - data_type: data_type.to_owned(), + field: field.to_owned(), variables: variables.to_vec(), } } @@ -40,7 +40,7 @@ impl PyScalarVariable { impl PyScalarVariable { /// Get the data type fn data_type(&self) -> PyResult { - Ok(self.data_type.clone().into()) + Ok(self.field.data_type().clone().into()) } fn variables(&self) -> PyResult> { @@ -48,6 +48,6 @@ impl PyScalarVariable { } fn __repr__(&self) -> PyResult { - Ok(format!("{}{:?}", self.data_type, self.variables)) + Ok(format!("{}{:?}", self.field.data_type(), self.variables)) } } diff --git a/src/expr/statement.rs b/src/expr/statement.rs index 40666dd8b..3d8c3c1c7 100644 --- a/src/expr/statement.rs +++ b/src/expr/statement.rs @@ -20,8 +20,8 @@ use std::sync::Arc; use arrow::datatypes::Field; use arrow::pyarrow::PyArrowType; use datafusion::logical_expr::{ - Deallocate, Execute, Prepare, SetVariable, TransactionAccessMode, TransactionConclusion, - TransactionEnd, TransactionIsolationLevel, TransactionStart, + Deallocate, Execute, Prepare, ResetVariable, SetVariable, TransactionAccessMode, + TransactionConclusion, TransactionEnd, TransactionIsolationLevel, TransactionStart, }; use pyo3::prelude::*; use pyo3::IntoPyObjectExt; @@ -259,6 +259,50 @@ impl PyTransactionEnd { } } +#[pyclass(frozen, name = "ResetVariable", module = "datafusion.expr", subclass)] +#[derive(Clone)] +pub struct PyResetVariable { + reset_variable: ResetVariable, +} + +impl From for PyResetVariable { + fn from(reset_variable: ResetVariable) -> PyResetVariable { + PyResetVariable { reset_variable } + } +} + +impl TryFrom for ResetVariable { + type Error = PyErr; + + fn try_from(py: PyResetVariable) -> Result { + Ok(py.reset_variable) + } +} + +impl LogicalNode for PyResetVariable { + fn inputs(&self) -> Vec { + vec![] + } + + fn to_variant<'py>(&self, py: Python<'py>) -> PyResult> { + self.clone().into_bound_py_any(py) + } +} + +#[pymethods] +impl PyResetVariable { + #[new] + pub fn new(variable: String) -> Self { + PyResetVariable { + reset_variable: ResetVariable { variable }, + } + } + + pub fn variable(&self) -> String { + self.reset_variable.variable.clone() + } +} + #[pyclass(frozen, name = "SetVariable", module = "datafusion.expr", subclass)] #[derive(Clone)] pub struct PySetVariable { diff --git a/src/sql/logical.rs b/src/sql/logical.rs index 37f20d287..786118199 100644 --- a/src/sql/logical.rs +++ b/src/sql/logical.rs @@ -55,7 +55,8 @@ use crate::expr::recursive_query::PyRecursiveQuery; use crate::expr::repartition::PyRepartition; use crate::expr::sort::PySort; use crate::expr::statement::{ - PyDeallocate, PyExecute, PyPrepare, PySetVariable, PyTransactionEnd, PyTransactionStart, + PyDeallocate, PyExecute, PyPrepare, PyResetVariable, PySetVariable, PyTransactionEnd, + PyTransactionStart, }; use crate::expr::subquery::PySubquery; use crate::expr::subquery_alias::PySubqueryAlias; @@ -115,6 +116,9 @@ impl PyLogicalPlan { PyTransactionEnd::from(plan.clone()).to_variant(py) } Statement::SetVariable(plan) => PySetVariable::from(plan.clone()).to_variant(py), + Statement::ResetVariable(plan) => { + PyResetVariable::from(plan.clone()).to_variant(py) + } Statement::Prepare(plan) => PyPrepare::from(plan.clone()).to_variant(py), Statement::Execute(plan) => PyExecute::from(plan.clone()).to_variant(py), Statement::Deallocate(plan) => PyDeallocate::from(plan.clone()).to_variant(py), diff --git a/src/table.rs b/src/table.rs index 0eec57f75..f41405467 100644 --- a/src/table.rs +++ b/src/table.rs @@ -28,7 +28,9 @@ use datafusion::logical_expr::{Expr, LogicalPlanBuilder, TableProviderFilterPush use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::DataFrame; use pyo3::prelude::*; +use pyo3::IntoPyObjectExt; +use crate::context::PySessionContext; use crate::dataframe::PyDataFrame; use crate::dataset::Dataset; use crate::utils::table_provider_from_pycapsule; @@ -60,7 +62,8 @@ impl PyTable { /// - FFI Table Providers via PyCapsule /// - PyArrow Dataset objects #[new] - pub fn new(obj: &Bound<'_, PyAny>) -> PyResult { + pub fn new(obj: Bound<'_, PyAny>, session: Option>) -> PyResult { + let py = obj.py(); if let Ok(py_table) = obj.extract::() { Ok(py_table) } else if let Ok(py_table) = obj @@ -77,11 +80,16 @@ impl PyTable { { let provider = py_df.inner_df().as_ref().clone().into_view(); Ok(PyTable::from(provider)) - } else if let Some(provider) = table_provider_from_pycapsule(obj)? { + } else if let Some(provider) = { + let session = match session { + Some(session) => session, + None => PySessionContext::global_ctx()?.into_bound_py_any(obj.py())?, + }; + table_provider_from_pycapsule(obj.clone(), session)? + } { Ok(PyTable::from(provider)) } else { - let py = obj.py(); - let provider = Arc::new(Dataset::new(obj, py)?) as Arc; + let provider = Arc::new(Dataset::new(&obj, py)?) as Arc; Ok(PyTable::from(provider)) } } diff --git a/src/udaf.rs b/src/udaf.rs index 92857f9f7..262366a8a 100644 --- a/src/udaf.rs +++ b/src/udaf.rs @@ -23,9 +23,9 @@ use datafusion::arrow::pyarrow::{PyArrowType, ToPyArrow}; use datafusion::common::ScalarValue; use datafusion::error::{DataFusionError, Result}; use datafusion::logical_expr::{ - create_udaf, Accumulator, AccumulatorFactoryFunction, AggregateUDF, + create_udaf, Accumulator, AccumulatorFactoryFunction, AggregateUDF, AggregateUDFImpl, }; -use datafusion_ffi::udaf::{FFI_AggregateUDF, ForeignAggregateUDF}; +use datafusion_ffi::udaf::FFI_AggregateUDF; use pyo3::prelude::*; use pyo3::types::{PyCapsule, PyTuple}; @@ -158,9 +158,9 @@ fn aggregate_udf_from_capsule(capsule: &Bound<'_, PyCapsule>) -> PyDataFusionRes validate_pycapsule(capsule, "datafusion_aggregate_udf")?; let udaf = unsafe { capsule.reference::() }; - let udaf: ForeignAggregateUDF = udaf.try_into()?; + let udaf: Arc = udaf.into(); - Ok(udaf.into()) + Ok(AggregateUDF::new_from_shared_impl(udaf)) } /// Represents an AggregateUDF diff --git a/src/udf.rs b/src/udf.rs index 5cf87c825..c5d25a4b7 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -22,8 +22,8 @@ use datafusion::arrow::datatypes::DataType; use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType, ToPyArrow}; use datafusion::error::DataFusionError; use datafusion::logical_expr::function::ScalarFunctionImplementation; -use datafusion::logical_expr::{create_udf, ColumnarValue, ScalarUDF}; -use datafusion_ffi::udf::{FFI_ScalarUDF, ForeignScalarUDF}; +use datafusion::logical_expr::{create_udf, ColumnarValue, ScalarUDF, ScalarUDFImpl}; +use datafusion_ffi::udf::FFI_ScalarUDF; use pyo3::prelude::*; use pyo3::types::{PyCapsule, PyTuple}; @@ -112,10 +112,10 @@ impl PyScalarUDF { validate_pycapsule(capsule, "datafusion_scalar_udf")?; let udf = unsafe { capsule.reference::() }; - let udf: ForeignScalarUDF = udf.try_into()?; + let udf: Arc = udf.into(); Ok(Self { - function: udf.into(), + function: ScalarUDF::new_from_shared_impl(udf), }) } else { Err(crate::errors::PyDataFusionError::Common( diff --git a/src/udtf.rs b/src/udtf.rs index 7226dbe92..eee00462c 100644 --- a/src/udtf.rs +++ b/src/udtf.rs @@ -20,10 +20,13 @@ use std::sync::Arc; use datafusion::catalog::{TableFunctionImpl, TableProvider}; use datafusion::error::Result as DataFusionResult; use datafusion::logical_expr::Expr; -use datafusion_ffi::udtf::{FFI_TableFunction, ForeignTableFunction}; +use datafusion_ffi::udtf::FFI_TableFunction; +use pyo3::exceptions::{PyImportError, PyTypeError}; use pyo3::prelude::*; -use pyo3::types::{PyCapsule, PyTuple}; +use pyo3::types::{PyCapsule, PyTuple, PyType}; +use pyo3::IntoPyObjectExt; +use crate::context::PySessionContext; use crate::errors::{py_datafusion_err, to_datafusion_err}; use crate::expr::PyExpr; use crate::table::PyTable; @@ -47,17 +50,34 @@ pub(crate) enum PyTableFunctionInner { #[pymethods] impl PyTableFunction { #[new] - #[pyo3(signature=(name, func))] - pub fn new(name: &str, func: Bound<'_, PyAny>) -> PyResult { + #[pyo3(signature=(name, func, session))] + pub fn new( + name: &str, + func: Bound<'_, PyAny>, + session: Option>, + ) -> PyResult { let inner = if func.hasattr("__datafusion_table_function__")? { - let capsule = func.getattr("__datafusion_table_function__")?.call0()?; + let py = func.py(); + let session = match session { + Some(session) => session, + None => PySessionContext::global_ctx()?.into_bound_py_any(py)?, + }; + let capsule = func + .getattr("__datafusion_table_function__")? + .call1((session,)).map_err(|err| { + if err.get_type(py).is(PyType::new::(py)) { + PyImportError::new_err("Incompatible libraries. DataFusion 52.0.0 introduced an incompatible signature change for table functions. Either downgrade DataFusion or upgrade your function library.") + } else { + err + } + })?; let capsule = capsule.downcast::().map_err(py_datafusion_err)?; validate_pycapsule(capsule, "datafusion_table_function")?; let ffi_func = unsafe { capsule.reference::() }; - let foreign_func: ForeignTableFunction = ffi_func.to_owned().into(); + let foreign_func: Arc = ffi_func.to_owned().into(); - PyTableFunctionInner::FFIFunction(Arc::new(foreign_func)) + PyTableFunctionInner::FFIFunction(foreign_func) } else { let py_obj = Arc::new(func.unbind()); PyTableFunctionInner::PythonFunction(py_obj) @@ -96,9 +116,9 @@ fn call_python_table_function( Python::attach(|py| { let py_args = PyTuple::new(py, args)?; let provider_obj = func.call1(py, py_args)?; - let provider = provider_obj.bind(py); + let provider = provider_obj.bind(py).clone(); - Ok::, PyErr>(PyTable::new(provider)?.table) + Ok::, PyErr>(PyTable::new(provider, None)?.table) }) .map_err(to_datafusion_err) } diff --git a/src/udwf.rs b/src/udwf.rs index d347ec0f1..86310609c 100644 --- a/src/udwf.rs +++ b/src/udwf.rs @@ -30,7 +30,7 @@ use datafusion::logical_expr::{ PartitionEvaluator, PartitionEvaluatorFactory, Signature, Volatility, WindowUDF, WindowUDFImpl, }; use datafusion::scalar::ScalarValue; -use datafusion_ffi::udwf::{FFI_WindowUDF, ForeignWindowUDF}; +use datafusion_ffi::udwf::FFI_WindowUDF; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::{PyCapsule, PyList, PyTuple}; @@ -249,22 +249,21 @@ impl PyWindowUDF { #[staticmethod] pub fn from_pycapsule(func: Bound<'_, PyAny>) -> PyDataFusionResult { - if func.hasattr("__datafusion_window_udf__")? { - let capsule = func.getattr("__datafusion_window_udf__")?.call0()?; - let capsule = capsule.downcast::().map_err(py_datafusion_err)?; - validate_pycapsule(capsule, "datafusion_window_udf")?; + let capsule = if func.hasattr("__datafusion_window_udf__")? { + func.getattr("__datafusion_window_udf__")?.call0()? + } else { + func + }; - let udwf = unsafe { capsule.reference::() }; - let udwf: ForeignWindowUDF = udwf.try_into()?; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + validate_pycapsule(capsule, "datafusion_window_udf")?; - Ok(Self { - function: udwf.into(), - }) - } else { - Err(crate::errors::PyDataFusionError::Common( - "__datafusion_window_udf__ does not exist on WindowUDF object.".to_string(), - )) - } + let udwf = unsafe { capsule.reference::() }; + let udwf: Arc = udwf.into(); + + Ok(Self { + function: WindowUDF::new_from_shared_impl(udwf), + }) } fn __repr__(&self) -> PyResult { diff --git a/src/utils.rs b/src/utils.rs index 6038c77b1..eede34907 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -23,15 +23,18 @@ use datafusion::common::ScalarValue; use datafusion::datasource::TableProvider; use datafusion::execution::context::SessionContext; use datafusion::logical_expr::Volatility; -use datafusion_ffi::table_provider::{FFI_TableProvider, ForeignTableProvider}; -use pyo3::exceptions::PyValueError; +use datafusion_ffi::proto::logical_extension_codec::FFI_LogicalExtensionCodec; +use datafusion_ffi::table_provider::FFI_TableProvider; +use pyo3::exceptions::{PyImportError, PyTypeError, PyValueError}; use pyo3::prelude::*; -use pyo3::types::PyCapsule; +use pyo3::types::{PyCapsule, PyType}; +use pyo3::IntoPyObjectExt; use tokio::runtime::Runtime; use tokio::task::JoinHandle; use tokio::time::sleep; use crate::common::data_type::PyScalarValue; +use crate::context::PySessionContext; use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionError, PyDataFusionResult}; use crate::TokioRuntime; @@ -60,9 +63,9 @@ pub(crate) fn is_ipython_env(py: Python) -> &'static bool { /// Utility to get the Global Datafussion CTX #[inline] -pub(crate) fn get_global_ctx() -> &'static SessionContext { - static CTX: OnceLock = OnceLock::new(); - CTX.get_or_init(SessionContext::new) +pub(crate) fn get_global_ctx() -> &'static Arc { + static CTX: OnceLock> = OnceLock::new(); + CTX.get_or_init(|| Arc::new(SessionContext::new())) } /// Utility to collect rust futures with GIL released and respond to @@ -167,18 +170,30 @@ pub(crate) fn validate_pycapsule(capsule: &Bound, name: &str) -> PyRe Ok(()) } -pub(crate) fn table_provider_from_pycapsule( - obj: &Bound, +pub(crate) fn table_provider_from_pycapsule<'py>( + mut obj: Bound<'py, PyAny>, + session: Bound<'py, PyAny>, ) -> PyResult>> { if obj.hasattr("__datafusion_table_provider__")? { - let capsule = obj.getattr("__datafusion_table_provider__")?.call0()?; - let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + obj = obj + .getattr("__datafusion_table_provider__")? + .call1((session,)).map_err(|err| { + let py = obj.py(); + if err.get_type(py).is(PyType::new::(py)) { + PyImportError::new_err("Incompatible libraries. DataFusion 52.0.0 introduced an incompatible signature change for table providers. Either downgrade DataFusion or upgrade your function library.") + } else { + err + } + })?; + } + + if let Ok(capsule) = obj.downcast::().map_err(py_datafusion_err) { validate_pycapsule(capsule, "datafusion_table_provider")?; let provider = unsafe { capsule.reference::() }; - let provider: ForeignTableProvider = provider.into(); + let provider: Arc = provider.into(); - Ok(Some(Arc::new(provider))) + Ok(Some(provider)) } else { Ok(None) } @@ -199,3 +214,37 @@ pub(crate) fn py_obj_to_scalar_value(py: Python, obj: Py) -> PyResult>, +) -> PyResult> { + let obj = match obj { + Some(obj) => obj, + None => PySessionContext::global_ctx()?.into_bound_py_any(py)?, + }; + let capsule = if obj.hasattr("__datafusion_logical_extension_codec__")? { + let capsule = obj + .getattr("__datafusion_logical_extension_codec__")? + .call0()?; + capsule + } else { + obj + }; + let capsule = capsule.downcast::().map_err(py_datafusion_err)?; + + validate_pycapsule(capsule, "datafusion_logical_extension_codec")?; + + let codec = unsafe { capsule.reference::() }; + Ok(Arc::new(codec.clone())) +} + +pub(crate) fn create_logical_extension_capsule<'py>( + py: Python<'py>, + codec: &FFI_LogicalExtensionCodec, +) -> PyResult> { + let name = cr"datafusion_logical_extension_codec".into(); + let codec = codec.clone(); + + PyCapsule::new(py, codec, Some(name)) +} From ada3dcdb8da0fe14cf4e61fe874485e937468f29 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 4 Feb 2026 07:30:29 -0500 Subject: [PATCH 03/87] build(deps): bump actions/checkout from 5 to 6 (#1310) Bumps [actions/checkout](https://github.com/actions/checkout) from 5 to 6. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v5...v6) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build.yml | 18 +++++++++--------- .github/workflows/dev.yml | 2 +- .github/workflows/test.yaml | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2dc8b96fe..8eb64b420 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -27,7 +27,7 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Install Python uses: actions/setup-python@v6 with: @@ -55,7 +55,7 @@ jobs: generate-license: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: astral-sh/setup-uv@v7 with: enable-cache: true @@ -77,7 +77,7 @@ jobs: python-version: ["3.10"] os: [macos-latest, windows-latest] steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: @@ -133,7 +133,7 @@ jobs: matrix: python-version: ["3.10"] steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: @@ -177,7 +177,7 @@ jobs: name: Manylinux x86_64 runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - run: rm LICENSE.txt - name: Download LICENSE.txt uses: actions/download-artifact@v5 @@ -206,7 +206,7 @@ jobs: name: Manylinux arm64 runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - run: rm LICENSE.txt - name: Download LICENSE.txt uses: actions/download-artifact@v5 @@ -236,7 +236,7 @@ jobs: name: Source distribution runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - run: rm LICENSE.txt - name: Download LICENSE.txt uses: actions/download-artifact@v5 @@ -299,11 +299,11 @@ jobs: fi - name: Checkout docs sources - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: Checkout docs target branch if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref_type == 'tag') - uses: actions/checkout@v5 + uses: actions/checkout@v6 with: fetch-depth: 0 ref: ${{ steps.target-branch.outputs.value }} diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index ac45e9fdf..434fd9b57 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -25,7 +25,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: Setup Python uses: actions/setup-python@v6 with: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 99457b872..687ac9cba 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -42,7 +42,7 @@ jobs: - "stable" steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Verify example datafusion version run: | @@ -126,7 +126,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@v6 - name: Setup Rust Toolchain uses: dtolnay/rust-toolchain@stable From f6cc7a2a9f7e08aafd1e6c700f130b4cff733533 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 4 Feb 2026 07:31:00 -0500 Subject: [PATCH 04/87] build(deps): bump actions/download-artifact from 5 to 7 (#1321) Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 5 to 7. - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/v5...v7) --- updated-dependencies: - dependency-name: actions/download-artifact dependency-version: '7' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8eb64b420..cdb6f4443 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -87,7 +87,7 @@ jobs: - run: rm LICENSE.txt - name: Download LICENSE.txt - uses: actions/download-artifact@v5 + uses: actions/download-artifact@v7 with: name: python-wheel-license path: . @@ -143,7 +143,7 @@ jobs: - run: rm LICENSE.txt - name: Download LICENSE.txt - uses: actions/download-artifact@v5 + uses: actions/download-artifact@v7 with: name: python-wheel-license path: . @@ -180,7 +180,7 @@ jobs: - uses: actions/checkout@v6 - run: rm LICENSE.txt - name: Download LICENSE.txt - uses: actions/download-artifact@v5 + uses: actions/download-artifact@v7 with: name: python-wheel-license path: . @@ -209,7 +209,7 @@ jobs: - uses: actions/checkout@v6 - run: rm LICENSE.txt - name: Download LICENSE.txt - uses: actions/download-artifact@v5 + uses: actions/download-artifact@v7 with: name: python-wheel-license path: . @@ -239,7 +239,7 @@ jobs: - uses: actions/checkout@v6 - run: rm LICENSE.txt - name: Download LICENSE.txt - uses: actions/download-artifact@v5 + uses: actions/download-artifact@v7 with: name: python-wheel-license path: . @@ -321,7 +321,7 @@ jobs: # Download the Linux wheel built in the previous job - name: Download pre-built Linux wheel - uses: actions/download-artifact@v5 + uses: actions/download-artifact@v7 with: name: dist-manylinux-x86_64 path: wheels/ @@ -375,7 +375,7 @@ jobs: # needs: [build-manylinux, build-python-mac-win] # runs-on: ubuntu-latest # steps: - # - uses: actions/download-artifact@v5 + # - uses: actions/download-artifact@v7 # - name: Publish to PyPI # uses: pypa/gh-action-pypi-publish@master # with: From 2465e19c390861163f024164df0c6987ccb0fec5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 4 Feb 2026 07:31:25 -0500 Subject: [PATCH 05/87] build(deps): bump actions/upload-artifact from 4 to 6 (#1322) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4 to 6. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v4...v6) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/build.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cdb6f4443..911c536a8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -62,7 +62,7 @@ jobs: - name: Generate license file run: uv run --no-project python ./dev/create_license.py - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v6 with: name: python-wheel-license path: LICENSE.txt @@ -119,7 +119,7 @@ jobs: run: find target/wheels/ - name: Archive wheels - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: dist-${{ matrix.os }} path: target/wheels/* @@ -167,7 +167,7 @@ jobs: run: find target/wheels/ - name: Archive wheels - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: dist-macos-aarch64 path: target/wheels/* @@ -196,7 +196,7 @@ jobs: rustup-components: rust-std rustfmt # Keep them in one line due to https://github.com/PyO3/maturin-action/issues/153 args: --release --manylinux 2014 --features protoc,substrait - name: Archive wheels - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: dist-manylinux-x86_64 path: target/wheels/* @@ -226,7 +226,7 @@ jobs: rustup-components: rust-std rustfmt # Keep them in one line due to https://github.com/PyO3/maturin-action/issues/153 args: --release --features protoc,substrait - name: Archive wheels - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v6 with: name: dist-manylinux-aarch64 path: target/wheels/* @@ -271,7 +271,7 @@ jobs: - build-sdist steps: - name: Merge Build Artifacts - uses: actions/upload-artifact/merge@v4 + uses: actions/upload-artifact/merge@v6 with: name: dist pattern: dist-* From 32272765a4a6215befd75c6cd1af22d09e170808 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 4 Feb 2026 07:31:49 -0500 Subject: [PATCH 06/87] build(deps): bump actions/cache from 4 to 5 (#1323) Bumps [actions/cache](https://github.com/actions/cache) from 4 to 5. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/cache dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 687ac9cba..df4d8fcda 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -74,7 +74,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Cache Cargo - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ~/.cargo key: cargo-cache-${{ steps.rust-toolchain.outputs.cachekey }}-${{ hashFiles('Cargo.lock') }} @@ -105,7 +105,7 @@ jobs: - name: Cache the generated dataset id: cache-tpch-dataset - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: benchmarks/tpch/data key: tpch-data-2.18.0 From 015dd76f9fdc8fe74cce1c87fa348b20698903fd Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 4 Feb 2026 08:54:45 -0500 Subject: [PATCH 07/87] Pass Field information back and forth when using scalar UDFs (#1299) * Pass Field information back and forth when using scalar UDFs * Add ArrowArrayExportable class and use it to create pyarrow arrays for python UDFs * Minor user documentation update * Update naming from type to field where appropriate * Add unit test to check field inputs * Update docstring * Add text to user documentation on passing field information for scalar UDFs * Minor change requested in code review * Make type hints match outer --- .../common-operations/udf-and-udfa.rst | 11 ++ pyproject.toml | 1 + python/datafusion/user_defined.py | 79 ++++++---- python/tests/test_udf.py | 86 ++++++++++- src/array.rs | 82 ++++++++++ src/lib.rs | 1 + src/udf.rs | 146 ++++++++++++++---- uv.lock | 55 +------ 8 files changed, 348 insertions(+), 113 deletions(-) create mode 100644 src/array.rs diff --git a/docs/source/user-guide/common-operations/udf-and-udfa.rst b/docs/source/user-guide/common-operations/udf-and-udfa.rst index 0830fa81c..d554e1e25 100644 --- a/docs/source/user-guide/common-operations/udf-and-udfa.rst +++ b/docs/source/user-guide/common-operations/udf-and-udfa.rst @@ -90,6 +90,17 @@ converting to Python objects to do the evaluation. df.select(col("a"), is_null_arr(col("a")).alias("is_null")).show() +In this example we passed the PyArrow ``DataType`` when we defined the function +by calling ``udf()``. If you need additional control, such as specifying +metadata or nullability of the input or output, you can instead specify a +PyArrow ``Field``. + +If you need to write a custom function but do not want to incur the performance +cost of converting to Python objects and back, a more advanced approach is to +write Rust based UDFs and to expose them to Python. There is an example in the +`DataFusion blog `_ +describing how to do this. + Aggregate Functions ------------------- diff --git a/pyproject.toml b/pyproject.toml index 9ad7dab8d..497943a34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -141,6 +141,7 @@ dev = [ "maturin>=1.8.1", "numpy>1.25.0;python_version<'3.14'", "numpy>=2.3.2;python_version>='3.14'", + "pyarrow>=19.0.0", "pre-commit>=4.3.0", "pyyaml>=6.0.3", "pytest>=7.4.4", diff --git a/python/datafusion/user_defined.py b/python/datafusion/user_defined.py index e6543e767..5dd626568 100644 --- a/python/datafusion/user_defined.py +++ b/python/datafusion/user_defined.py @@ -34,7 +34,7 @@ from _typeshed import CapsuleType as _PyCapsule _R = TypeVar("_R", bound=pa.DataType) - from collections.abc import Callable + from collections.abc import Callable, Sequence class Volatility(Enum): @@ -81,6 +81,27 @@ def __str__(self) -> str: return self.name.lower() +def data_type_or_field_to_field(value: pa.DataType | pa.Field, name: str) -> pa.Field: + """Helper function to return a Field from either a Field or DataType.""" + if isinstance(value, pa.Field): + return value + return pa.field(name, type=value) + + +def data_types_or_fields_to_field_list( + inputs: Sequence[pa.Field | pa.DataType] | pa.Field | pa.DataType, +) -> list[pa.Field]: + """Helper function to return a list of Fields.""" + if isinstance(inputs, pa.DataType): + return [pa.field("value", type=inputs)] + if isinstance(inputs, pa.Field): + return [inputs] + + return [ + data_type_or_field_to_field(v, f"value_{idx}") for (idx, v) in enumerate(inputs) + ] + + class ScalarUDFExportable(Protocol): """Type hint for object that has __datafusion_scalar_udf__ PyCapsule.""" @@ -103,8 +124,8 @@ def __init__( self, name: str, func: Callable[..., _R], - input_types: pa.DataType | list[pa.DataType], - return_type: _R, + input_fields: list[pa.Field], + return_field: _R, volatility: Volatility | str, ) -> None: """Instantiate a scalar user-defined function (UDF). @@ -114,10 +135,10 @@ def __init__( if hasattr(func, "__datafusion_scalar_udf__"): self._udf = df_internal.ScalarUDF.from_pycapsule(func) return - if isinstance(input_types, pa.DataType): - input_types = [input_types] + if isinstance(input_fields, pa.DataType): + input_fields = [input_fields] self._udf = df_internal.ScalarUDF( - name, func, input_types, return_type, str(volatility) + name, func, input_fields, return_field, str(volatility) ) def __repr__(self) -> str: @@ -136,8 +157,8 @@ def __call__(self, *args: Expr) -> Expr: @overload @staticmethod def udf( - input_types: list[pa.DataType], - return_type: _R, + input_fields: Sequence[pa.DataType | pa.Field] | pa.DataType | pa.Field, + return_field: pa.DataType | pa.Field, volatility: Volatility | str, name: str | None = None, ) -> Callable[..., ScalarUDF]: ... @@ -146,8 +167,8 @@ def udf( @staticmethod def udf( func: Callable[..., _R], - input_types: list[pa.DataType], - return_type: _R, + input_fields: Sequence[pa.DataType | pa.Field] | pa.DataType | pa.Field, + return_field: pa.DataType | pa.Field, volatility: Volatility | str, name: str | None = None, ) -> ScalarUDF: ... @@ -163,20 +184,24 @@ def udf(*args: Any, **kwargs: Any): # noqa: D417 This class can be used both as either a function or a decorator. Usage: - - As a function: ``udf(func, input_types, return_type, volatility, name)``. - - As a decorator: ``@udf(input_types, return_type, volatility, name)``. + - As a function: ``udf(func, input_fields, return_field, volatility, name)``. + - As a decorator: ``@udf(input_fields, return_field, volatility, name)``. When used a decorator, do **not** pass ``func`` explicitly. + In lieu of passing a PyArrow Field, you can pass a DataType for simplicity. + When you do so, it will be assumed that the nullability of the inputs and + output are True and that they have no metadata. + Args: func (Callable, optional): Only needed when calling as a function. Skip this argument when using `udf` as a decorator. If you have a Rust backed ScalarUDF within a PyCapsule, you can pass this parameter and ignore the rest. They will be determined directly from the underlying function. See the online documentation for more information. - input_types (list[pa.DataType]): The data types of the arguments - to ``func``. This list must be of the same length as the number of - arguments. - return_type (_R): The data type of the return value from the function. + input_fields (list[pa.Field | pa.DataType]): The data types or Fields + of the arguments to ``func``. This list must be of the same length + as the number of arguments. + return_field (_R): The field of the return value from the function. volatility (Volatility | str): See `Volatility` for allowed values. name (Optional[str]): A descriptive name for the function. @@ -196,12 +221,12 @@ def double_func(x): @udf([pa.int32()], pa.int32(), "volatile", "double_it") def double_udf(x): return x * 2 - """ + """ # noqa: W505 E501 def _function( func: Callable[..., _R], - input_types: list[pa.DataType], - return_type: _R, + input_fields: Sequence[pa.DataType | pa.Field] | pa.DataType | pa.Field, + return_field: pa.DataType | pa.Field, volatility: Volatility | str, name: str | None = None, ) -> ScalarUDF: @@ -213,23 +238,25 @@ def _function( name = func.__qualname__.lower() else: name = func.__class__.__name__.lower() + input_fields = data_types_or_fields_to_field_list(input_fields) + return_field = data_type_or_field_to_field(return_field, "value") return ScalarUDF( name=name, func=func, - input_types=input_types, - return_type=return_type, + input_fields=input_fields, + return_field=return_field, volatility=volatility, ) def _decorator( - input_types: list[pa.DataType], - return_type: _R, + input_fields: Sequence[pa.DataType | pa.Field] | pa.DataType | pa.Field, + return_field: _R, volatility: Volatility | str, name: str | None = None, ) -> Callable: def decorator(func: Callable) -> Callable: udf_caller = ScalarUDF.udf( - func, input_types, return_type, volatility, name + func, input_fields, return_field, volatility, name ) @functools.wraps(func) @@ -260,8 +287,8 @@ def from_pycapsule(func: ScalarUDFExportable) -> ScalarUDF: return ScalarUDF( name=name, func=func, - input_types=None, - return_type=None, + input_fields=None, + return_field=None, volatility=None, ) diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py index a6c047552..c90668efe 100644 --- a/python/tests/test_udf.py +++ b/python/tests/test_udf.py @@ -17,7 +17,8 @@ import pyarrow as pa import pytest -from datafusion import column, udf +from datafusion import SessionContext, column, udf +from datafusion import functions as f @pytest.fixture @@ -124,3 +125,86 @@ def udf_with_param(values: pa.Array) -> pa.Array: result = df2.collect()[0].column(0) assert result == pa.array([False, True, True]) + + +def test_udf_with_metadata(ctx) -> None: + from uuid import UUID + + @udf([pa.string()], pa.uuid(), "stable") + def uuid_from_string(uuid_string): + return pa.array((UUID(s).bytes for s in uuid_string.to_pylist()), pa.uuid()) + + @udf([pa.uuid()], pa.int64(), "stable") + def uuid_version(uuid): + return pa.array(s.version for s in uuid.to_pylist()) + + batch = pa.record_batch({"idx": pa.array(range(5))}) + results = ( + ctx.create_dataframe([[batch]]) + .with_column("uuid_string", f.uuid()) + .with_column("uuid", uuid_from_string(column("uuid_string"))) + .select(uuid_version(column("uuid").alias("uuid_version"))) + .collect() + ) + + assert results[0][0].to_pylist() == [4, 4, 4, 4, 4] + + +def test_udf_with_nullability(ctx: SessionContext) -> None: + import pyarrow.compute as pc + + field_nullable_i64 = pa.field("with_nulls", type=pa.int64(), nullable=True) + field_non_nullable_i64 = pa.field("no_nulls", type=pa.int64(), nullable=False) + + @udf([field_nullable_i64], field_nullable_i64, "stable") + def nullable_abs(input_col): + return pc.abs(input_col) + + @udf([field_non_nullable_i64], field_non_nullable_i64, "stable") + def non_nullable_abs(input_col): + return pc.abs(input_col) + + batch = pa.record_batch( + { + "with_nulls": pa.array([-2, None, 0, 1, 2]), + "no_nulls": pa.array([-2, -1, 0, 1, 2]), + }, + schema=pa.schema( + [ + field_nullable_i64, + field_non_nullable_i64, + ] + ), + ) + ctx.register_record_batches("t", [[batch]]) + df = ctx.table("t") + + # Input matches expected, nullable + df_result = df.select(nullable_abs(column("with_nulls"))) + returned_field = df_result.schema().field(0) + assert returned_field.nullable + results = df_result.collect() + assert results[0][0].to_pylist() == [2, None, 0, 1, 2] + + # Input coercible to expected, nullable + df_result = df.select(nullable_abs(column("no_nulls"))) + returned_field = df_result.schema().field(0) + assert returned_field.nullable + results = df_result.collect() + assert results[0][0].to_pylist() == [2, 1, 0, 1, 2] + + # Input matches expected, no nulls + df_result = df.select(non_nullable_abs(column("no_nulls"))) + returned_field = df_result.schema().field(0) + assert not returned_field.nullable + results = df_result.collect() + assert results[0][0].to_pylist() == [2, 1, 0, 1, 2] + + # Invalid - requires non-nullable input but that is not possible + df_result = df.select(non_nullable_abs(column("with_nulls"))) + returned_field = df_result.schema().field(0) + assert not returned_field.nullable + + with pytest.raises(Exception) as e_info: + _results = df_result.collect() + assert "InvalidArgumentError" in str(e_info) diff --git a/src/array.rs b/src/array.rs new file mode 100644 index 000000000..4dbd708cd --- /dev/null +++ b/src/array.rs @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow::array::{Array, ArrayRef}; +use arrow::datatypes::{Field, FieldRef}; +use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema}; +use arrow::pyarrow::ToPyArrow; +use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods}; +use pyo3::types::PyCapsule; +use pyo3::{pyclass, pymethods, Bound, PyAny, PyResult, Python}; + +use crate::errors::PyDataFusionResult; +use crate::utils::validate_pycapsule; + +/// A Python object which implements the Arrow PyCapsule for importing +/// into other libraries. +#[pyclass(name = "ArrowArrayExportable", module = "datafusion", frozen)] +#[derive(Clone)] +pub struct PyArrowArrayExportable { + array: ArrayRef, + field: FieldRef, +} + +#[pymethods] +impl PyArrowArrayExportable { + #[pyo3(signature = (requested_schema=None))] + fn __arrow_c_array__<'py>( + &'py self, + py: Python<'py>, + requested_schema: Option>, + ) -> PyDataFusionResult<(Bound<'py, PyCapsule>, Bound<'py, PyCapsule>)> { + let field = if let Some(schema_capsule) = requested_schema { + validate_pycapsule(&schema_capsule, "arrow_schema")?; + + let schema_ptr = unsafe { schema_capsule.reference::() }; + let desired_field = Field::try_from(schema_ptr)?; + + Arc::new(desired_field) + } else { + Arc::clone(&self.field) + }; + + let ffi_schema = FFI_ArrowSchema::try_from(&field)?; + let schema_capsule = PyCapsule::new(py, ffi_schema, Some(cr"arrow_schema".into()))?; + + let ffi_array = FFI_ArrowArray::new(&self.array.to_data()); + let array_capsule = PyCapsule::new(py, ffi_array, Some(cr"arrow_array".into()))?; + + Ok((schema_capsule, array_capsule)) + } +} + +impl ToPyArrow for PyArrowArrayExportable { + fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult> { + let module = py.import("pyarrow")?; + let method = module.getattr("array")?; + let array = method.call((self.clone(),), None)?; + Ok(array) + } +} + +impl PyArrowArrayExportable { + pub fn new(array: ArrayRef, field: FieldRef) -> Self { + Self { array, field } + } +} diff --git a/src/lib.rs b/src/lib.rs index 9483a5252..eda50fe10 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -52,6 +52,7 @@ pub mod store; pub mod table; pub mod unparser; +mod array; #[cfg(feature = "substrait")] pub mod substrait; #[allow(clippy::borrow_deref_ref)] diff --git a/src/udf.rs b/src/udf.rs index c5d25a4b7..3eec936c2 100644 --- a/src/udf.rs +++ b/src/udf.rs @@ -15,67 +15,143 @@ // specific language governing permissions and limitations // under the License. +use std::any::Any; +use std::hash::{Hash, Hasher}; use std::sync::Arc; -use datafusion::arrow::array::{make_array, Array, ArrayData, ArrayRef}; +use arrow::datatypes::{Field, FieldRef}; +use arrow::pyarrow::ToPyArrow; +use datafusion::arrow::array::{make_array, ArrayData}; use datafusion::arrow::datatypes::DataType; -use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType, ToPyArrow}; +use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType}; +use datafusion::common::internal_err; use datafusion::error::DataFusionError; -use datafusion::logical_expr::function::ScalarFunctionImplementation; -use datafusion::logical_expr::{create_udf, ColumnarValue, ScalarUDF, ScalarUDFImpl}; +use datafusion::logical_expr::{ + ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, + Volatility, +}; use datafusion_ffi::udf::FFI_ScalarUDF; use pyo3::prelude::*; use pyo3::types::{PyCapsule, PyTuple}; +use crate::array::PyArrowArrayExportable; use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionResult}; use crate::expr::PyExpr; use crate::utils::{parse_volatility, validate_pycapsule}; -/// Create a Rust callable function from a python function that expects pyarrow arrays -fn pyarrow_function_to_rust( +/// This struct holds the Python written function that is a +/// ScalarUDF. +#[derive(Debug)] +struct PythonFunctionScalarUDF { + name: String, func: Py, -) -> impl Fn(&[ArrayRef]) -> Result { - move |args: &[ArrayRef]| -> Result { + signature: Signature, + return_field: FieldRef, +} + +impl PythonFunctionScalarUDF { + fn new( + name: String, + func: Py, + input_fields: Vec, + return_field: Field, + volatility: Volatility, + ) -> Self { + let input_types = input_fields.iter().map(|f| f.data_type().clone()).collect(); + let signature = Signature::exact(input_types, volatility); + Self { + name, + func, + signature, + return_field: Arc::new(return_field), + } + } +} + +impl Eq for PythonFunctionScalarUDF {} +impl PartialEq for PythonFunctionScalarUDF { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + && self.signature == other.signature + && self.return_field == other.return_field + && Python::attach(|py| self.func.bind(py).eq(other.func.bind(py)).unwrap_or(false)) + } +} + +impl Hash for PythonFunctionScalarUDF { + fn hash(&self, state: &mut H) { + self.name.hash(state); + self.signature.hash(state); + self.return_field.hash(state); + + Python::attach(|py| { + let py_hash = self.func.bind(py).hash().unwrap_or(0); // Handle unhashable objects + + state.write_isize(py_hash); + }); + } +} + +impl ScalarUDFImpl for PythonFunctionScalarUDF { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + &self.name + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> datafusion::common::Result { + internal_err!( + "return_field should not be called when return_field_from_args is implemented." + ) + } + + fn return_field_from_args( + &self, + _args: ReturnFieldArgs, + ) -> datafusion::common::Result { + Ok(Arc::clone(&self.return_field)) + } + + fn invoke_with_args( + &self, + args: ScalarFunctionArgs, + ) -> datafusion::common::Result { + let num_rows = args.number_rows; Python::attach(|py| { // 1. cast args to Pyarrow arrays let py_args = args - .iter() - .map(|arg| { - arg.into_data() + .args + .into_iter() + .zip(args.arg_fields) + .map(|(arg, field)| { + let array = arg.to_array(num_rows)?; + PyArrowArrayExportable::new(array, field) .to_pyarrow(py) - .map_err(|e| DataFusionError::Execution(format!("{e:?}"))) + .map_err(to_datafusion_err) }) .collect::, _>>()?; let py_args = PyTuple::new(py, py_args).map_err(to_datafusion_err)?; // 2. call function - let value = func + let value = self + .func .call(py, py_args, None) .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?; // 3. cast to arrow::array::Array let array_data = ArrayData::from_pyarrow_bound(value.bind(py)) .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?; - Ok(make_array(array_data)) + Ok(ColumnarValue::Array(make_array(array_data))) }) } } -/// Create a DataFusion's UDF implementation from a python function -/// that expects pyarrow arrays. This is more efficient as it performs -/// a zero-copy of the contents. -fn to_scalar_function_impl(func: Py) -> ScalarFunctionImplementation { - // Make the python function callable from rust - let pyarrow_func = pyarrow_function_to_rust(func); - - // Convert input/output from datafusion ColumnarValue to arrow arrays - Arc::new(move |args: &[ColumnarValue]| { - let array_refs = ColumnarValue::values_to_arrays(args)?; - let array_result = pyarrow_func(&array_refs)?; - Ok(array_result.into()) - }) -} - /// Represents a PyScalarUDF #[pyclass(frozen, name = "ScalarUDF", module = "datafusion", subclass)] #[derive(Debug, Clone)] @@ -88,19 +164,21 @@ impl PyScalarUDF { #[new] #[pyo3(signature=(name, func, input_types, return_type, volatility))] fn new( - name: &str, + name: String, func: Py, - input_types: PyArrowType>, - return_type: PyArrowType, + input_types: PyArrowType>, + return_type: PyArrowType, volatility: &str, ) -> PyResult { - let function = create_udf( + let py_function = PythonFunctionScalarUDF::new( name, + func, input_types.0, return_type.0, parse_volatility(volatility)?, - to_scalar_function_impl(func), ); + let function = ScalarUDF::new_from_impl(py_function); + Ok(Self { function }) } diff --git a/uv.lock b/uv.lock index 92a10c7cd..743611005 100644 --- a/uv.lock +++ b/uv.lock @@ -253,8 +253,7 @@ wheels = [ name = "datafusion" source = { editable = "." } dependencies = [ - { name = "pyarrow", version = "18.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, - { name = "pyarrow", version = "22.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, + { name = "pyarrow" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] @@ -265,6 +264,7 @@ dev = [ { name = "numpy", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14'" }, { name = "numpy", version = "2.3.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14'" }, { name = "pre-commit" }, + { name = "pyarrow" }, { name = "pygithub" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -298,6 +298,7 @@ dev = [ { name = "numpy", marker = "python_full_version < '3.14'", specifier = ">1.25.0" }, { name = "numpy", marker = "python_full_version >= '3.14'", specifier = ">=2.3.2" }, { name = "pre-commit", specifier = ">=4.3.0" }, + { name = "pyarrow", specifier = ">=19.0.0" }, { name = "pygithub", specifier = "==2.5.0" }, { name = "pytest", specifier = ">=7.4.4" }, { name = "pytest-asyncio", specifier = ">=0.23.3" }, @@ -920,60 +921,10 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 }, ] -[[package]] -name = "pyarrow" -version = "18.1.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12' and python_full_version < '3.14'", - "python_full_version == '3.11.*'", - "python_full_version < '3.11'", -] -sdist = { url = "https://files.pythonhosted.org/packages/7f/7b/640785a9062bb00314caa8a387abce547d2a420cf09bd6c715fe659ccffb/pyarrow-18.1.0.tar.gz", hash = "sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73", size = 1118671 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1a/bb/8d4a1573f66e0684f190dd2b55fd0b97a7214de8882d58a3867e777bf640/pyarrow-18.1.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e21488d5cfd3d8b500b3238a6c4b075efabc18f0f6d80b29239737ebd69caa6c", size = 29531620 }, - { url = "https://files.pythonhosted.org/packages/30/90/893acfad917533b624a97b9e498c0e8393908508a0a72d624fe935e632bf/pyarrow-18.1.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:b516dad76f258a702f7ca0250885fc93d1fa5ac13ad51258e39d402bd9e2e1e4", size = 30836521 }, - { url = "https://files.pythonhosted.org/packages/a3/2a/526545a7464b5fb2fa6e2c4bad16ca90e59e1843025c534fd907b7f73e5a/pyarrow-18.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f443122c8e31f4c9199cb23dca29ab9427cef990f283f80fe15b8e124bcc49b", size = 39213905 }, - { url = "https://files.pythonhosted.org/packages/8a/77/4b3fab91a30e19e233e738d0c5eca5a8f6dd05758bc349a2ca262c65de79/pyarrow-18.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0a03da7f2758645d17b7b4f83c8bffeae5bbb7f974523fe901f36288d2eab71", size = 40128881 }, - { url = "https://files.pythonhosted.org/packages/aa/e2/a88e16c5e45e562449c52305bd3bc2f9d704295322d3434656e7ccac1444/pyarrow-18.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ba17845efe3aa358ec266cf9cc2800fa73038211fb27968bfa88acd09261a470", size = 38627517 }, - { url = "https://files.pythonhosted.org/packages/6d/84/8037c20005ccc7b869726465be0957bd9c29cfc88612962030f08292ad06/pyarrow-18.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:3c35813c11a059056a22a3bef520461310f2f7eea5c8a11ef9de7062a23f8d56", size = 40060187 }, - { url = "https://files.pythonhosted.org/packages/2a/38/d6435c723ff73df8ae74626ea778262fbcc2b9b0d1a4f3db915b61711b05/pyarrow-18.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9736ba3c85129d72aefa21b4f3bd715bc4190fe4426715abfff90481e7d00812", size = 25118314 }, - { url = "https://files.pythonhosted.org/packages/9e/4d/a4988e7d82f4fbc797715db4185939a658eeffb07a25bab7262bed1ea076/pyarrow-18.1.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:eaeabf638408de2772ce3d7793b2668d4bb93807deed1725413b70e3156a7854", size = 29554860 }, - { url = "https://files.pythonhosted.org/packages/59/03/3a42c5c1e4bd4c900ab62aa1ff6b472bdb159ba8f1c3e5deadab7222244f/pyarrow-18.1.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:3b2e2239339c538f3464308fd345113f886ad031ef8266c6f004d49769bb074c", size = 30867076 }, - { url = "https://files.pythonhosted.org/packages/75/7e/332055ac913373e89256dce9d14b7708f55f7bd5be631456c897f0237738/pyarrow-18.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f39a2e0ed32a0970e4e46c262753417a60c43a3246972cfc2d3eb85aedd01b21", size = 39212135 }, - { url = "https://files.pythonhosted.org/packages/8c/64/5099cdb325828722ef7ffeba9a4696f238eb0cdeae227f831c2d77fcf1bd/pyarrow-18.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e31e9417ba9c42627574bdbfeada7217ad8a4cbbe45b9d6bdd4b62abbca4c6f6", size = 40125195 }, - { url = "https://files.pythonhosted.org/packages/83/88/1938d783727db1b178ff71bc6a6143d7939e406db83a9ec23cad3dad325c/pyarrow-18.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:01c034b576ce0eef554f7c3d8c341714954be9b3f5d5bc7117006b85fcf302fe", size = 38641884 }, - { url = "https://files.pythonhosted.org/packages/5e/b5/9e14e9f7590e0eaa435ecea84dabb137284a4dbba7b3c337b58b65b76d95/pyarrow-18.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f266a2c0fc31995a06ebd30bcfdb7f615d7278035ec5b1cd71c48d56daaf30b0", size = 40076877 }, - { url = "https://files.pythonhosted.org/packages/4d/a3/817ac7fe0891a2d66e247e223080f3a6a262d8aefd77e11e8c27e6acf4e1/pyarrow-18.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:d4f13eee18433f99adefaeb7e01d83b59f73360c231d4782d9ddfaf1c3fbde0a", size = 25119811 }, - { url = "https://files.pythonhosted.org/packages/6a/50/12829e7111b932581e51dda51d5cb39207a056c30fe31ef43f14c63c4d7e/pyarrow-18.1.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9f3a76670b263dc41d0ae877f09124ab96ce10e4e48f3e3e4257273cee61ad0d", size = 29514620 }, - { url = "https://files.pythonhosted.org/packages/d1/41/468c944eab157702e96abab3d07b48b8424927d4933541ab43788bb6964d/pyarrow-18.1.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:da31fbca07c435be88a0c321402c4e31a2ba61593ec7473630769de8346b54ee", size = 30856494 }, - { url = "https://files.pythonhosted.org/packages/68/f9/29fb659b390312a7345aeb858a9d9c157552a8852522f2c8bad437c29c0a/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:543ad8459bc438efc46d29a759e1079436290bd583141384c6f7a1068ed6f992", size = 39203624 }, - { url = "https://files.pythonhosted.org/packages/6e/f6/19360dae44200e35753c5c2889dc478154cd78e61b1f738514c9f131734d/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0743e503c55be0fdb5c08e7d44853da27f19dc854531c0570f9f394ec9671d54", size = 40139341 }, - { url = "https://files.pythonhosted.org/packages/bb/e6/9b3afbbcf10cc724312e824af94a2e993d8ace22994d823f5c35324cebf5/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:d4b3d2a34780645bed6414e22dda55a92e0fcd1b8a637fba86800ad737057e33", size = 38618629 }, - { url = "https://files.pythonhosted.org/packages/3a/2e/3b99f8a3d9e0ccae0e961978a0d0089b25fb46ebbcfb5ebae3cca179a5b3/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c52f81aa6f6575058d8e2c782bf79d4f9fdc89887f16825ec3a66607a5dd8e30", size = 40078661 }, - { url = "https://files.pythonhosted.org/packages/76/52/f8da04195000099d394012b8d42c503d7041b79f778d854f410e5f05049a/pyarrow-18.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:0ad4892617e1a6c7a551cfc827e072a633eaff758fa09f21c4ee548c30bcaf99", size = 25092330 }, - { url = "https://files.pythonhosted.org/packages/cb/87/aa4d249732edef6ad88899399047d7e49311a55749d3c373007d034ee471/pyarrow-18.1.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:84e314d22231357d473eabec709d0ba285fa706a72377f9cc8e1cb3c8013813b", size = 29497406 }, - { url = "https://files.pythonhosted.org/packages/3c/c7/ed6adb46d93a3177540e228b5ca30d99fc8ea3b13bdb88b6f8b6467e2cb7/pyarrow-18.1.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:f591704ac05dfd0477bb8f8e0bd4b5dc52c1cadf50503858dce3a15db6e46ff2", size = 30835095 }, - { url = "https://files.pythonhosted.org/packages/41/d7/ed85001edfb96200ff606943cff71d64f91926ab42828676c0fc0db98963/pyarrow-18.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acb7564204d3c40babf93a05624fc6a8ec1ab1def295c363afc40b0c9e66c191", size = 39194527 }, - { url = "https://files.pythonhosted.org/packages/59/16/35e28eab126342fa391593415d79477e89582de411bb95232f28b131a769/pyarrow-18.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74de649d1d2ccb778f7c3afff6085bd5092aed4c23df9feeb45dd6b16f3811aa", size = 40131443 }, - { url = "https://files.pythonhosted.org/packages/0c/95/e855880614c8da20f4cd74fa85d7268c725cf0013dc754048593a38896a0/pyarrow-18.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f96bd502cb11abb08efea6dab09c003305161cb6c9eafd432e35e76e7fa9b90c", size = 38608750 }, - { url = "https://files.pythonhosted.org/packages/54/9d/f253554b1457d4fdb3831b7bd5f8f00f1795585a606eabf6fec0a58a9c38/pyarrow-18.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:36ac22d7782554754a3b50201b607d553a8d71b78cdf03b33c1125be4b52397c", size = 40066690 }, - { url = "https://files.pythonhosted.org/packages/2f/58/8912a2563e6b8273e8aa7b605a345bba5a06204549826f6493065575ebc0/pyarrow-18.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:25dbacab8c5952df0ca6ca0af28f50d45bd31c1ff6fcf79e2d120b4a65ee7181", size = 25081054 }, - { url = "https://files.pythonhosted.org/packages/82/f9/d06ddc06cab1ada0c2f2fd205ac8c25c2701182de1b9c4bf7a0a44844431/pyarrow-18.1.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a276190309aba7bc9d5bd2933230458b3521a4317acfefe69a354f2fe59f2bc", size = 29525542 }, - { url = "https://files.pythonhosted.org/packages/ab/94/8917e3b961810587ecbdaa417f8ebac0abb25105ae667b7aa11c05876976/pyarrow-18.1.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ad514dbfcffe30124ce655d72771ae070f30bf850b48bc4d9d3b25993ee0e386", size = 30829412 }, - { url = "https://files.pythonhosted.org/packages/5e/e3/3b16c3190f3d71d3b10f6758d2d5f7779ef008c4fd367cedab3ed178a9f7/pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aebc13a11ed3032d8dd6e7171eb6e86d40d67a5639d96c35142bd568b9299324", size = 39119106 }, - { url = "https://files.pythonhosted.org/packages/1d/d6/5d704b0d25c3c79532f8c0639f253ec2803b897100f64bcb3f53ced236e5/pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6cf5c05f3cee251d80e98726b5c7cc9f21bab9e9783673bac58e6dfab57ecc8", size = 40090940 }, - { url = "https://files.pythonhosted.org/packages/37/29/366bc7e588220d74ec00e497ac6710c2833c9176f0372fe0286929b2d64c/pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:11b676cd410cf162d3f6a70b43fb9e1e40affbc542a1e9ed3681895f2962d3d9", size = 38548177 }, - { url = "https://files.pythonhosted.org/packages/c8/11/fabf6ecabb1fe5b7d96889228ca2a9158c4c3bb732e3b8ee3f7f6d40b703/pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b76130d835261b38f14fc41fdfb39ad8d672afb84c447126b84d5472244cfaba", size = 40043567 }, -] - [[package]] name = "pyarrow" version = "22.0.0" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.14'", -] sdist = { url = "https://files.pythonhosted.org/packages/30/53/04a7fdc63e6056116c9ddc8b43bc28c12cdd181b85cbeadb79278475f3ae/pyarrow-22.0.0.tar.gz", hash = "sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9", size = 1151151 } wheels = [ { url = "https://files.pythonhosted.org/packages/d9/9b/cb3f7e0a345353def531ca879053e9ef6b9f38ed91aebcf68b09ba54dec0/pyarrow-22.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:77718810bd3066158db1e95a63c160ad7ce08c6b0710bc656055033e39cdad88", size = 34223968 }, From f72e549694e3287b459495cfd7affb3b132589b3 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Wed, 4 Feb 2026 08:54:57 -0500 Subject: [PATCH 08/87] Cargo update to prepare for DF52 release (#1368) --- Cargo.lock | 579 ++++++++++----------- examples/datafusion-ffi-example/Cargo.lock | 348 ++++++------- 2 files changed, 444 insertions(+), 483 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1fb17610e..592a797bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -146,18 +146,18 @@ dependencies = [ [[package]] name = "ar_archive_writer" -version = "0.2.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" dependencies = [ "object", ] [[package]] name = "arc-swap" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d03449bb8ca2cc2ef70869af31463d1ae5ccc8fa3e334b307203fbf815207e" +checksum = "9ded5f9a03ac8f24d1b8a25101ee812cd32cdc8c50a4c50237de2c4915850e73" dependencies = [ "rustversion", ] @@ -176,9 +176,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" +checksum = "2a2b10dcb159faf30d3f81f6d56c1211a5bea2ca424eabe477648a44b993320e" dependencies = [ "arrow-arith", "arrow-array", @@ -198,9 +198,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" +checksum = "288015089e7931843c80ed4032c5274f02b37bcb720c4a42096d50b390e70372" dependencies = [ "arrow-array", "arrow-buffer", @@ -212,9 +212,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" +checksum = "65ca404ea6191e06bf30956394173337fa9c35f445bd447fe6c21ab944e1a23c" dependencies = [ "ahash", "arrow-buffer", @@ -231,9 +231,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" +checksum = "36356383099be0151dacc4245309895f16ba7917d79bdb71a7148659c9206c56" dependencies = [ "bytes", "half", @@ -243,9 +243,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" +checksum = "9c8e372ed52bd4ee88cc1e6c3859aa7ecea204158ac640b10e187936e7e87074" dependencies = [ "arrow-array", "arrow-buffer", @@ -265,9 +265,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" +checksum = "8e4100b729fe656f2e4fb32bc5884f14acf9118d4ad532b7b33c1132e4dce896" dependencies = [ "arrow-array", "arrow-cast", @@ -280,9 +280,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" +checksum = "bf87f4ff5fc13290aa47e499a8b669a82c5977c6a1fedce22c7f542c1fd5a597" dependencies = [ "arrow-buffer", "arrow-schema", @@ -293,9 +293,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" +checksum = "eb3ca63edd2073fcb42ba112f8ae165df1de935627ead6e203d07c99445f2081" dependencies = [ "arrow-array", "arrow-buffer", @@ -309,9 +309,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" +checksum = "a36b2332559d3310ebe3e173f75b29989b4412df4029a26a30cc3f7da0869297" dependencies = [ "arrow-array", "arrow-buffer", @@ -333,9 +333,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" +checksum = "13c4e0530272ca755d6814218dffd04425c5b7854b87fa741d5ff848bf50aa39" dependencies = [ "arrow-array", "arrow-buffer", @@ -346,9 +346,9 @@ dependencies = [ [[package]] name = "arrow-pyarrow" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbd810e3997bae72f58cda57231ccb0a2fda07911ca1b0a5718cbf9379abb297" +checksum = "f45c7989cb70214b2f362eaa10266d15e1a433692f2ea1514018be3aace679f4" dependencies = [ "arrow-array", "arrow-data", @@ -358,9 +358,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" +checksum = "b07f52788744cc71c4628567ad834cadbaeb9f09026ff1d7a4120f69edf7abd3" dependencies = [ "arrow-array", "arrow-buffer", @@ -371,9 +371,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" +checksum = "6bb63203e8e0e54b288d0d8043ca8fa1013820822a27692ef1b78a977d879f2c" dependencies = [ "bitflags", "serde_core", @@ -382,9 +382,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" +checksum = "c96d8a1c180b44ecf2e66c9a2f2bbcb8b1b6f14e165ce46ac8bde211a363411b" dependencies = [ "ahash", "arrow-array", @@ -396,9 +396,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" +checksum = "a8ad6a81add9d3ea30bf8374ee8329992c7fd246ffd8b7e2f48a3cea5aa0cc9a" dependencies = [ "arrow-array", "arrow-buffer", @@ -425,13 +425,12 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.36" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98ec5f6c2f8bc326c994cb9e241cc257ddaba9afa8555a43cffbb5dd86efaa37" +checksum = "d10e4f991a553474232bc0a31799f6d24b034a84c0971d80d2e2f78b2e576e40" dependencies = [ "compression-codecs", "compression-core", - "futures-core", "pin-project-lite", "tokio", ] @@ -453,7 +452,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -464,7 +463,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -525,15 +524,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.8.2" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" +checksum = "2468ef7d57b3fb7e16b576e8377cdbde2320c60e1491e961d11da40fc4f02a2d" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", + "cpufeatures", ] [[package]] @@ -547,9 +547,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.8.1" +version = "3.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1" +checksum = "234655ec178edd82b891e262ea7cf71f6584bcd09eff94db786be23f1821825c" dependencies = [ "bon-macros", "rustversion", @@ -557,9 +557,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.8.1" +version = "3.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645" +checksum = "89ec27229c38ed0eb3c0feee3d2c1d6a4379ae44f418a29a658890e062d8f365" dependencies = [ "darling", "ident_case", @@ -567,7 +567,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -605,9 +605,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "bzip2" @@ -620,9 +620,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.51" +version = "1.2.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", "jobserver", @@ -644,9 +644,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "num-traits", @@ -675,9 +675,9 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.2.1" +version = "7.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" +checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" dependencies = [ "unicode-segmentation", "unicode-width", @@ -685,9 +685,9 @@ dependencies = [ [[package]] name = "compression-codecs" -version = "0.4.35" +version = "0.4.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0f7ac3e5b97fdce45e8922fb05cae2c37f7bbd63d30dd94821dacfd8f3f2bf2" +checksum = "00828ba6fd27b45a448e57dbfe84f1029d4c9f26b368157e9a448a5f49a2ec2a" dependencies = [ "bzip2", "compression-core", @@ -719,7 +719,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "once_cell", "tiny-keccak", ] @@ -735,9 +735,9 @@ dependencies = [ [[package]] name = "constant_time_eq" -version = "0.3.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" [[package]] name = "core-foundation" @@ -852,9 +852,9 @@ dependencies = [ [[package]] name = "darling" -version = "0.21.3" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" dependencies = [ "darling_core", "darling_macro", @@ -862,27 +862,26 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.21.3" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" dependencies = [ - "fnv", "ident_case", "proc-macro2", "quote", "strsim", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] name = "darling_macro" -version = "0.21.3" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" dependencies = [ "darling_core", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -901,9 +900,9 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f02e9a7e70f214e5282db11c8effba173f4e25a00977e520c6b811817e3a082b" +checksum = "d12ee9fdc6cdb5898c7691bb994f0ba606c4acc93a2258d78bb9f26ff8158bb3" dependencies = [ "arrow", "arrow-schema", @@ -957,9 +956,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3e91b2603f906cf8cb8be84ba4e34f9d8fe6dbdfdd6916d55f22317074d1fdf" +checksum = "462dc9ef45e5d688aeaae49a7e310587e81b6016b9d03bace5626ad0043e5a9e" dependencies = [ "arrow", "async-trait", @@ -982,9 +981,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919d20cdebddee4d8dca651aa0291a44c8104824d1ac288996a325c319ce31ba" +checksum = "1b96dbf1d728fc321817b744eb5080cdd75312faa6980b338817f68f3caa4208" dependencies = [ "arrow", "async-trait", @@ -1005,9 +1004,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31ff2c4e95be40ad954de93862167b165a6fb49248bb882dea8aef4f888bc767" +checksum = "3237a6ff0d2149af4631290074289cae548c9863c885d821315d54c6673a074a" dependencies = [ "ahash", "apache-avro", @@ -1030,9 +1029,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd9f820fe58c2600b6c33a14432228dbaaf233b96c83a1fd61f16d073d5c3c5" +checksum = "70b5e34026af55a1bfccb1ef0a763cf1f64e77c696ffcf5a128a278c31236528" dependencies = [ "futures", "log", @@ -1041,9 +1040,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b32b7b12645805d20b70aba6ba846cd262d7b073f7f617640c3294af108d44" +checksum = "1b2a6be734cc3785e18bbf2a7f2b22537f6b9fb960d79617775a51568c281842" dependencies = [ "arrow", "async-compression", @@ -1076,9 +1075,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597695c8ebb723ee927b286139d43a3fbed6de7ad9210bd1a9fed5c721ac6fb1" +checksum = "1739b9b07c9236389e09c74f770e88aff7055250774e9def7d3f4f56b3dcc7be" dependencies = [ "arrow", "arrow-ipc", @@ -1100,9 +1099,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c94347a431bac4bcb8408abb1cb5d40dab41f8d59c39db32f8f59e08144875f" +checksum = "828088c2fb681cc0e06fb42f541f76c82a0c10278f9fd6334e22c8d1e3574ee7" dependencies = [ "apache-avro", "arrow", @@ -1120,9 +1119,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bb493d07d8da6d00a89ea9cc3e74a56795076d9faed5ac30284bd9ef37929e9" +checksum = "61c73bc54b518bbba7c7650299d07d58730293cfba4356f6f428cc94c20b7600" dependencies = [ "arrow", "async-trait", @@ -1143,9 +1142,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9806521c4d3632f53b9a664041813c267c670232efa1452ef29faee71c3749" +checksum = "37812c8494c698c4d889374ecfabbff780f1f26d9ec095dd1bddfc2a8ca12559" dependencies = [ "arrow", "async-trait", @@ -1165,9 +1164,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6a3ccd48d5034f8461f522114d0e46dfb3a9f0ce01a4d53a721024ace95d60d" +checksum = "2210937ecd9f0e824c397e73f4b5385c97cd1aff43ab2b5836fcfd2d321523fb" dependencies = [ "arrow", "async-trait", @@ -1195,15 +1194,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff69a18418e9878d4840f35e2ad7f2a6386beedf192e9f065e628a7295ff5fbf" +checksum = "2c825f969126bc2ef6a6a02d94b3c07abff871acf4d6dd759ce1255edb7923ce" [[package]] name = "datafusion-execution" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccbc5e469b35d87c0b115327be83d68356ef9154684d32566315b5c071577e23" +checksum = "fa03ef05a2c2f90dd6c743e3e111078e322f4b395d20d4b4d431a245d79521ae" dependencies = [ "arrow", "async-trait", @@ -1222,9 +1221,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81ed3c02a3faf4e09356d5a314471703f440f0a6a14ca6addaf6cfb44ab14de5" +checksum = "ef33934c1f98ee695cc51192cc5f9ed3a8febee84fdbcd9131bf9d3a9a78276f" dependencies = [ "arrow", "async-trait", @@ -1245,9 +1244,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1567e60d21c372ca766dc9dde98efabe2b06d98f008d988fed00d93546bf5be7" +checksum = "000c98206e3dd47d2939a94b6c67af4bfa6732dd668ac4fafdbde408fd9134ea" dependencies = [ "arrow", "datafusion-common", @@ -1258,9 +1257,9 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3ec8492849520101bee44489da8c49f2df3c34d55fd6e502f8083e66a30cdff" +checksum = "30f57f7f63a25a0b78b3f2a5e18c0ecbd54851b64064ac0d5a9eb05efd5586d2" dependencies = [ "abi_stable", "arrow", @@ -1288,9 +1287,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4593538abd95c27eeeb2f86b7ad827cce07d0c474eae9b122f4f9675f8c20ad" +checksum = "379b01418ab95ca947014066248c22139fe9af9289354de10b445bd000d5d276" dependencies = [ "arrow", "arrow-buffer", @@ -1319,9 +1318,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f81cdf609f43cd26156934fd81beb7215d60dda40a776c2e1b83d73df69434f2" +checksum = "fd00d5454ba4c3f8ebbd04bd6a6a9dc7ced7c56d883f70f2076c188be8459e4c" dependencies = [ "ahash", "arrow", @@ -1340,9 +1339,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9173f1bcea2ede4a5c23630a48469f06c9db9a408eb5fd140d1ff9a5e0c40ebf" +checksum = "aec06b380729a87210a4e11f555ec2d729a328142253f8d557b87593622ecc9f" dependencies = [ "ahash", "arrow", @@ -1353,9 +1352,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d0b9f32e7735a3b94ae8b9596d89080dc63dd139029a91133be370da099490d" +checksum = "904f48d45e0f1eb7d0eb5c0f80f2b5c6046a85454364a6b16a2e0b46f62e7dff" dependencies = [ "arrow", "arrow-ord", @@ -1376,9 +1375,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a29e8a6201b3b9fb2be17d88e287c6d427948d64220cd5ea72ced614a1aee5" +checksum = "e9a0d20e2b887e11bee24f7734d780a2588b925796ac741c3118dd06d5aa77f0" dependencies = [ "arrow", "async-trait", @@ -1392,9 +1391,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd412754964a31c515e5a814e5ce0edaf30f0ea975f3691e800eff115ee76dfb" +checksum = "d3414b0a07e39b6979fe3a69c7aa79a9f1369f1d5c8e52146e66058be1b285ee" dependencies = [ "arrow", "datafusion-common", @@ -1410,9 +1409,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d49be73a5ac0797398927a543118bd68e58e80bf95ebdabc77336bcd9c38a711" +checksum = "5bf2feae63cd4754e31add64ce75cae07d015bce4bb41cd09872f93add32523a" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1420,20 +1419,20 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ff5489dcac4d34ed7a49a93310c3345018c4469e34726fa471cdda725346d" +checksum = "c4fe888aeb6a095c4bcbe8ac1874c4b9a4c7ffa2ba849db7922683ba20875aaf" dependencies = [ "datafusion-doc", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] name = "datafusion-optimizer" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a80bb7de8ff5a9948799bc7749c292eac5c629385cdb582893ef2d80b6e718c4" +checksum = "8a6527c063ae305c11be397a86d8193936f4b84d137fe40bd706dfc178cf733c" dependencies = [ "arrow", "chrono", @@ -1451,9 +1450,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83480008f66691a0047c5a88990bd76b7c1117dd8a49ca79959e214948b81f0a" +checksum = "0bb028323dd4efd049dd8a78d78fe81b2b969447b39c51424167f973ac5811d9" dependencies = [ "ahash", "arrow", @@ -1468,16 +1467,16 @@ dependencies = [ "itertools", "parking_lot", "paste", - "petgraph 0.8.3", + "petgraph", "recursive", "tokio", ] [[package]] name = "datafusion-physical-expr-adapter" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b438306446646b359666a658cc29d5494b1e9873bc7a57707689760666fc82c" +checksum = "78fe0826aef7eab6b4b61533d811234a7a9e5e458331ebbf94152a51fc8ab433" dependencies = [ "arrow", "datafusion-common", @@ -1490,9 +1489,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95b1fbf739038e0b313473588331c5bf79985d1b842b9937c1f10b170665cae1" +checksum = "cfccd388620734c661bd8b7ca93c44cdd59fecc9b550eea416a78ffcbb29475f" dependencies = [ "ahash", "arrow", @@ -1507,9 +1506,9 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4cd3a170faa0f1de04bd4365ccfe309056746dd802ed276e8787ccb8e8a0d4" +checksum = "bde5fa10e73259a03b705d5fddc136516814ab5f441b939525618a4070f5a059" dependencies = [ "arrow", "datafusion-common", @@ -1526,9 +1525,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a616a72b4ddf550652b36d5a7c0386eac4accea3ffc6c29a7b16c45f237e9882" +checksum = "0e1098760fb29127c24cc9ade3277051dc73c9ed0ac0131bd7bcd742e0ad7470" dependencies = [ "ahash", "arrow", @@ -1557,9 +1556,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0a96a57f60a53041c6d275dc46313bbe34f07ed51c4cea3557b321d4f3da7e0" +checksum = "0cf75daf56aa6b1c6867cc33ff0fb035d517d6d06737fd355a3e1ef67cba6e7a" dependencies = [ "arrow", "chrono", @@ -1584,9 +1583,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f4c54f4a47608d2ef5af2fbaca9975cac9e48b93acf41b2ccf584405a3da5e" +checksum = "12a0cb3cce232a3de0d14ef44b58a6537aeb1362cfb6cf4d808691ddbb918956" dependencies = [ "arrow", "datafusion-common", @@ -1595,9 +1594,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bf4b50be3ab65650452993eda4baf81edb245fb039b8714476b0f4c8801a527" +checksum = "64d0fef4201777b52951edec086c21a5b246f3c82621569ddb4a26f488bc38a9" dependencies = [ "arrow", "datafusion-common", @@ -1640,9 +1639,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66e080e2c105284460580c18e751b2133cc306df298181e4349b5b134632811a" +checksum = "f71f1e39e8f2acbf1c63b0e93756c2e970a64729dab70ac789587d6237c4fde0" dependencies = [ "async-trait", "datafusion-common", @@ -1654,9 +1653,9 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dac502db772ff9bffc2ceae321963091982e8d5f5dfcb877e8dc66fc9a093cc" +checksum = "f44693cfcaeb7a9f12d71d1c576c3a6dc025a12cef209375fa2d16fb3b5670ee" dependencies = [ "arrow", "bigdecimal", @@ -1672,9 +1671,9 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd76fd28a2804bf3d757ee19bfba8f818b3361157f0cdfa314609ad64f249b75" +checksum = "6042adacd0bd64e56c22f6a7f9ce0ce1793dd367c899d868179d029f110d9215" dependencies = [ "async-recursion", "async-trait", @@ -1710,7 +1709,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -1749,9 +1748,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.6" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixedbitset" @@ -1771,13 +1770,13 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.5" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", - "libz-rs-sys", "miniz_oxide", + "zlib-rs", ] [[package]] @@ -1863,7 +1862,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -1917,9 +1916,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "js-sys", @@ -1950,9 +1949,9 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "h2" -version = "0.4.12" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" dependencies = [ "atomic-waker", "bytes", @@ -2103,14 +2102,13 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.19" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64", "bytes", "futures-channel", - "futures-core", "futures-util", "http", "http-body", @@ -2127,9 +2125,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -2259,9 +2257,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" dependencies = [ "equivalent", "hashbrown 0.16.1", @@ -2325,9 +2323,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", @@ -2398,9 +2396,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" [[package]] name = "libc" -version = "0.2.179" +version = "0.2.180" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5a2d376baa530d1238d133232d15e239abad80d05838b4b59354e5268af431f" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" [[package]] name = "libloading" @@ -2423,9 +2421,9 @@ dependencies = [ [[package]] name = "liblzma-sys" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b9596486f6d60c3bbe644c0e1be1aa6ccc472ad630fe8927b456973d7cb736" +checksum = "9f2db66f3268487b5033077f266da6777d057949b8f93c8ad82e441df25e6186" dependencies = [ "cc", "libc", @@ -2434,9 +2432,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libmimalloc-sys" @@ -2448,15 +2446,6 @@ dependencies = [ "libc", ] -[[package]] -name = "libz-rs-sys" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c10501e7805cee23da17c7790e59df2870c0d4043ec6d03f67d31e2b53e77415" -dependencies = [ - "zlib-rs", -] - [[package]] name = "linux-raw-sys" version = "0.11.0" @@ -2601,18 +2590,18 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.37.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" dependencies = [ "memchr", ] [[package]] name = "object_store" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" +checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" dependencies = [ "async-trait", "base64", @@ -2654,9 +2643,9 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "openssl-probe" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f50d9b3dabb09ecd771ad0aa242ca6894994c130308ca3d7684634df8037391" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "ordered-float" @@ -2692,9 +2681,9 @@ dependencies = [ [[package]] name = "parquet" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" +checksum = "5f6a2926a30477c0b95fea6c28c3072712b139337a242c2cc64817bdc20a8854" dependencies = [ "ahash", "arrow-array", @@ -2776,16 +2765,6 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" -[[package]] -name = "petgraph" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" -dependencies = [ - "fixedbitset", - "indexmap", -] - [[package]] name = "petgraph" version = "0.8.3" @@ -2836,9 +2815,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "portable-atomic" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "potential_utf" @@ -2865,23 +2844,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" dependencies = [ "proc-macro2", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] name = "proc-macro2" -version = "1.0.104" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "prost" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -2889,42 +2868,41 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac6c3320f9abac597dcbc668774ef006702672474aad53c6d596b62e487b40b1" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", "itertools", "log", "multimap", - "once_cell", - "petgraph 0.7.1", + "petgraph", "prettyplease", "prost", "prost-types", "regex", - "syn 2.0.113", + "syn 2.0.114", "tempfile", ] [[package]] name = "prost-derive" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools", "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] name = "prost-types" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" dependencies = [ "prost", ] @@ -2940,9 +2918,9 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.28" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" +checksum = "1fa96cb91275ed31d6da3e983447320c4eb219ac180fa1679a0889ff32861e2d" dependencies = [ "ar_archive_writer", "cc", @@ -3017,7 +2995,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -3030,7 +3008,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -3106,9 +3084,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.42" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" dependencies = [ "proc-macro2", ] @@ -3141,9 +3119,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] @@ -3165,7 +3143,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -3179,9 +3157,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -3191,9 +3169,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -3202,15 +3180,15 @@ dependencies = [ [[package]] name = "regex-lite" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da" +checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" [[package]] name = "regress" @@ -3281,7 +3259,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted", "windows-sys 0.52.0", @@ -3317,9 +3295,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.35" +version = "0.23.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" +checksum = "c665f33d38cea657d9614f766881e4d510e0eda4239891eea56b4cadcf01801b" dependencies = [ "once_cell", "ring", @@ -3352,9 +3330,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" +checksum = "be040f8b0a225e40375822a563fa9524378b9d63112f53e19ffff34df5d33fdd" dependencies = [ "web-time", "zeroize", @@ -3362,9 +3340,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.8" +version = "0.103.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +checksum = "d7df23109aa6c1567d1c575b9952556388da57401e4ace1d15f79eedad0d8f53" dependencies = [ "ring", "rustls-pki-types", @@ -3422,7 +3400,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -3507,7 +3485,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -3518,14 +3496,14 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] name = "serde_json" -version = "1.0.148" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3084b546a1dd6289475996f182a22aba973866ea8e8b02c51d9f46b1336a22da" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", @@ -3543,7 +3521,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -3602,15 +3580,15 @@ checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "siphasher" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" @@ -3626,9 +3604,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "86f4aa3ad99f2088c990dfa82d367e19cb29268ed67c574d10d0a4bfe71f07e0" dependencies = [ "libc", "windows-sys 0.60.2", @@ -3653,7 +3631,7 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -3696,7 +3674,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -3720,7 +3698,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.113", + "syn 2.0.114", "typify", "walkdir", ] @@ -3744,9 +3722,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.113" +version = "2.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678faa00651c9eb72dd2020cbdf275d92eccb2400d568e419efdd64838145cb4" +checksum = "d4d107df263a3013ef9b1879b0df87d706ff80f65a86ea879bd9c31f9b307c2a" dependencies = [ "proc-macro2", "quote", @@ -3770,7 +3748,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -3794,22 +3772,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -3880,7 +3858,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -3908,9 +3886,9 @@ dependencies = [ [[package]] name = "tower" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", @@ -3970,7 +3948,7 @@ checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -4052,7 +4030,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.113", + "syn 2.0.114", "thiserror", "unicode-ident", ] @@ -4070,7 +4048,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.113", + "syn 2.0.114", "typify-impl", ] @@ -4112,9 +4090,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.7" +version = "2.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" dependencies = [ "form_urlencoded", "idna", @@ -4130,9 +4108,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.19.0" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f" dependencies = [ "getrandom 0.3.4", "js-sys", @@ -4173,18 +4151,18 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ "cfg-if", "once_cell", @@ -4195,11 +4173,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.56" +version = "0.4.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" +checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" dependencies = [ "cfg-if", + "futures-util", "js-sys", "once_cell", "wasm-bindgen", @@ -4208,9 +4187,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4218,22 +4197,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] @@ -4253,9 +4232,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" dependencies = [ "js-sys", "wasm-bindgen", @@ -4323,7 +4302,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -4334,7 +4313,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -4528,9 +4507,9 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "writeable" @@ -4557,28 +4536,28 @@ checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", "synstructure", ] [[package]] name = "zerocopy" -version = "0.8.31" +version = "0.8.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" +checksum = "57cf3aa6855b23711ee9852dfc97dfaa51c45feaba5b645d0c777414d494a961" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.31" +version = "0.8.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" +checksum = "8a616990af1a287837c4fe6596ad77ef57948f787e46ce28e166facc0cc1cb75" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] @@ -4598,7 +4577,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", "synstructure", ] @@ -4638,20 +4617,20 @@ checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.113", + "syn 2.0.114", ] [[package]] name = "zlib-rs" -version = "0.5.5" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" +checksum = "a7948af682ccbc3342b6e9420e8c51c1fe5d7bf7756002b4a3c6cabfe96a7e3c" [[package]] name = "zmij" -version = "1.0.10" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e0d8dffbae3d840f64bda38e28391faef673a7b5a6017840f2a106c8145868" +checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445" [[package]] name = "zstd" diff --git a/examples/datafusion-ffi-example/Cargo.lock b/examples/datafusion-ffi-example/Cargo.lock index 383716d41..f1a75e8c6 100644 --- a/examples/datafusion-ffi-example/Cargo.lock +++ b/examples/datafusion-ffi-example/Cargo.lock @@ -117,9 +117,9 @@ checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" [[package]] name = "arrow" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3" +checksum = "2a2b10dcb159faf30d3f81f6d56c1211a5bea2ca424eabe477648a44b993320e" dependencies = [ "arrow-arith", "arrow-array", @@ -138,9 +138,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049" +checksum = "288015089e7931843c80ed4032c5274f02b37bcb720c4a42096d50b390e70372" dependencies = [ "arrow-array", "arrow-buffer", @@ -152,9 +152,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002" +checksum = "65ca404ea6191e06bf30956394173337fa9c35f445bd447fe6c21ab944e1a23c" dependencies = [ "ahash", "arrow-buffer", @@ -171,9 +171,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626" +checksum = "36356383099be0151dacc4245309895f16ba7917d79bdb71a7148659c9206c56" dependencies = [ "bytes", "half", @@ -183,9 +183,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271" +checksum = "9c8e372ed52bd4ee88cc1e6c3859aa7ecea204158ac640b10e187936e7e87074" dependencies = [ "arrow-array", "arrow-buffer", @@ -205,9 +205,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b" +checksum = "8e4100b729fe656f2e4fb32bc5884f14acf9118d4ad532b7b33c1132e4dce896" dependencies = [ "arrow-array", "arrow-cast", @@ -220,9 +220,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3" +checksum = "bf87f4ff5fc13290aa47e499a8b669a82c5977c6a1fedce22c7f542c1fd5a597" dependencies = [ "arrow-buffer", "arrow-schema", @@ -233,9 +233,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3" +checksum = "eb3ca63edd2073fcb42ba112f8ae165df1de935627ead6e203d07c99445f2081" dependencies = [ "arrow-array", "arrow-buffer", @@ -248,9 +248,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0" +checksum = "a36b2332559d3310ebe3e173f75b29989b4412df4029a26a30cc3f7da0869297" dependencies = [ "arrow-array", "arrow-buffer", @@ -272,9 +272,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521" +checksum = "13c4e0530272ca755d6814218dffd04425c5b7854b87fa741d5ff848bf50aa39" dependencies = [ "arrow-array", "arrow-buffer", @@ -285,9 +285,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f" +checksum = "b07f52788744cc71c4628567ad834cadbaeb9f09026ff1d7a4120f69edf7abd3" dependencies = [ "arrow-array", "arrow-buffer", @@ -298,18 +298,18 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0" +checksum = "6bb63203e8e0e54b288d0d8043ca8fa1013820822a27692ef1b78a977d879f2c" dependencies = [ "bitflags", ] [[package]] name = "arrow-select" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010" +checksum = "c96d8a1c180b44ecf2e66c9a2f2bbcb8b1b6f14e165ce46ac8bde211a363411b" dependencies = [ "ahash", "arrow-array", @@ -321,9 +321,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534" +checksum = "a8ad6a81add9d3ea30bf8374ee8329992c7fd246ffd8b7e2f48a3cea5aa0cc9a" dependencies = [ "arrow-array", "arrow-buffer", @@ -430,15 +430,15 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "cc" -version = "1.2.52" +version = "1.2.55" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd4932aefd12402b36c60956a4fe0035421f544799057659ff86f923657aada3" +checksum = "47b26a0954ae34af09b50f0de26458fa95369a0d478d8236d3f93082b219bd29" dependencies = [ "find-msvc-tools", "jobserver", @@ -454,9 +454,9 @@ checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "chrono" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ "iana-time-zone", "num-traits", @@ -475,9 +475,9 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.2.1" +version = "7.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" +checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47" dependencies = [ "unicode-segmentation", "unicode-width", @@ -498,7 +498,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "once_cell", "tiny-keccak", ] @@ -533,15 +533,6 @@ version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "533d38ecd2709b7608fb8e18e4504deb99e9a72879e6aa66373a76d8dc4259ea" -[[package]] -name = "crc32fast" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" -dependencies = [ - "cfg-if", -] - [[package]] name = "crossbeam-channel" version = "0.5.15" @@ -600,9 +591,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3e91b2603f906cf8cb8be84ba4e34f9d8fe6dbdfdd6916d55f22317074d1fdf" +checksum = "462dc9ef45e5d688aeaae49a7e310587e81b6016b9d03bace5626ad0043e5a9e" dependencies = [ "arrow", "async-trait", @@ -625,9 +616,9 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919d20cdebddee4d8dca651aa0291a44c8104824d1ac288996a325c319ce31ba" +checksum = "1b96dbf1d728fc321817b744eb5080cdd75312faa6980b338817f68f3caa4208" dependencies = [ "arrow", "async-trait", @@ -648,9 +639,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31ff2c4e95be40ad954de93862167b165a6fb49248bb882dea8aef4f888bc767" +checksum = "3237a6ff0d2149af4631290074289cae548c9863c885d821315d54c6673a074a" dependencies = [ "ahash", "arrow", @@ -670,9 +661,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd9f820fe58c2600b6c33a14432228dbaaf233b96c83a1fd61f16d073d5c3c5" +checksum = "70b5e34026af55a1bfccb1ef0a763cf1f64e77c696ffcf5a128a278c31236528" dependencies = [ "futures", "log", @@ -681,9 +672,9 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b32b7b12645805d20b70aba6ba846cd262d7b073f7f617640c3294af108d44" +checksum = "1b2a6be734cc3785e18bbf2a7f2b22537f6b9fb960d79617775a51568c281842" dependencies = [ "arrow", "async-trait", @@ -710,9 +701,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597695c8ebb723ee927b286139d43a3fbed6de7ad9210bd1a9fed5c721ac6fb1" +checksum = "1739b9b07c9236389e09c74f770e88aff7055250774e9def7d3f4f56b3dcc7be" dependencies = [ "arrow", "arrow-ipc", @@ -734,9 +725,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bb493d07d8da6d00a89ea9cc3e74a56795076d9faed5ac30284bd9ef37929e9" +checksum = "61c73bc54b518bbba7c7650299d07d58730293cfba4356f6f428cc94c20b7600" dependencies = [ "arrow", "async-trait", @@ -757,9 +748,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9806521c4d3632f53b9a664041813c267c670232efa1452ef29faee71c3749" +checksum = "37812c8494c698c4d889374ecfabbff780f1f26d9ec095dd1bddfc2a8ca12559" dependencies = [ "arrow", "async-trait", @@ -779,9 +770,9 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6a3ccd48d5034f8461f522114d0e46dfb3a9f0ce01a4d53a721024ace95d60d" +checksum = "2210937ecd9f0e824c397e73f4b5385c97cd1aff43ab2b5836fcfd2d321523fb" dependencies = [ "arrow", "async-trait", @@ -809,15 +800,15 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff69a18418e9878d4840f35e2ad7f2a6386beedf192e9f065e628a7295ff5fbf" +checksum = "2c825f969126bc2ef6a6a02d94b3c07abff871acf4d6dd759ce1255edb7923ce" [[package]] name = "datafusion-execution" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccbc5e469b35d87c0b115327be83d68356ef9154684d32566315b5c071577e23" +checksum = "fa03ef05a2c2f90dd6c743e3e111078e322f4b395d20d4b4d431a245d79521ae" dependencies = [ "arrow", "async-trait", @@ -836,9 +827,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81ed3c02a3faf4e09356d5a314471703f440f0a6a14ca6addaf6cfb44ab14de5" +checksum = "ef33934c1f98ee695cc51192cc5f9ed3a8febee84fdbcd9131bf9d3a9a78276f" dependencies = [ "arrow", "async-trait", @@ -858,9 +849,9 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1567e60d21c372ca766dc9dde98efabe2b06d98f008d988fed00d93546bf5be7" +checksum = "000c98206e3dd47d2939a94b6c67af4bfa6732dd668ac4fafdbde408fd9134ea" dependencies = [ "arrow", "datafusion-common", @@ -871,9 +862,9 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3ec8492849520101bee44489da8c49f2df3c34d55fd6e502f8083e66a30cdff" +checksum = "30f57f7f63a25a0b78b3f2a5e18c0ecbd54851b64064ac0d5a9eb05efd5586d2" dependencies = [ "abi_stable", "arrow", @@ -919,9 +910,9 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4593538abd95c27eeeb2f86b7ad827cce07d0c474eae9b122f4f9675f8c20ad" +checksum = "379b01418ab95ca947014066248c22139fe9af9289354de10b445bd000d5d276" dependencies = [ "arrow", "arrow-buffer", @@ -946,9 +937,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f81cdf609f43cd26156934fd81beb7215d60dda40a776c2e1b83d73df69434f2" +checksum = "fd00d5454ba4c3f8ebbd04bd6a6a9dc7ced7c56d883f70f2076c188be8459e4c" dependencies = [ "ahash", "arrow", @@ -967,9 +958,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9173f1bcea2ede4a5c23630a48469f06c9db9a408eb5fd140d1ff9a5e0c40ebf" +checksum = "aec06b380729a87210a4e11f555ec2d729a328142253f8d557b87593622ecc9f" dependencies = [ "ahash", "arrow", @@ -980,9 +971,9 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a29e8a6201b3b9fb2be17d88e287c6d427948d64220cd5ea72ced614a1aee5" +checksum = "e9a0d20e2b887e11bee24f7734d780a2588b925796ac741c3118dd06d5aa77f0" dependencies = [ "arrow", "async-trait", @@ -996,9 +987,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd412754964a31c515e5a814e5ce0edaf30f0ea975f3691e800eff115ee76dfb" +checksum = "d3414b0a07e39b6979fe3a69c7aa79a9f1369f1d5c8e52146e66058be1b285ee" dependencies = [ "arrow", "datafusion-common", @@ -1014,9 +1005,9 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d49be73a5ac0797398927a543118bd68e58e80bf95ebdabc77336bcd9c38a711" +checksum = "5bf2feae63cd4754e31add64ce75cae07d015bce4bb41cd09872f93add32523a" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1024,9 +1015,9 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ff5489dcac4d34ed7a49a93310c3345018c4469e34726fa471cdda725346d" +checksum = "c4fe888aeb6a095c4bcbe8ac1874c4b9a4c7ffa2ba849db7922683ba20875aaf" dependencies = [ "datafusion-doc", "quote", @@ -1035,9 +1026,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83480008f66691a0047c5a88990bd76b7c1117dd8a49ca79959e214948b81f0a" +checksum = "0bb028323dd4efd049dd8a78d78fe81b2b969447b39c51424167f973ac5811d9" dependencies = [ "ahash", "arrow", @@ -1058,9 +1049,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b438306446646b359666a658cc29d5494b1e9873bc7a57707689760666fc82c" +checksum = "78fe0826aef7eab6b4b61533d811234a7a9e5e458331ebbf94152a51fc8ab433" dependencies = [ "arrow", "datafusion-common", @@ -1073,9 +1064,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95b1fbf739038e0b313473588331c5bf79985d1b842b9937c1f10b170665cae1" +checksum = "cfccd388620734c661bd8b7ca93c44cdd59fecc9b550eea416a78ffcbb29475f" dependencies = [ "ahash", "arrow", @@ -1090,9 +1081,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a616a72b4ddf550652b36d5a7c0386eac4accea3ffc6c29a7b16c45f237e9882" +checksum = "0e1098760fb29127c24cc9ade3277051dc73c9ed0ac0131bd7bcd742e0ad7470" dependencies = [ "ahash", "arrow", @@ -1121,9 +1112,9 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0a96a57f60a53041c6d275dc46313bbe34f07ed51c4cea3557b321d4f3da7e0" +checksum = "0cf75daf56aa6b1c6867cc33ff0fb035d517d6d06737fd355a3e1ef67cba6e7a" dependencies = [ "arrow", "chrono", @@ -1148,9 +1139,9 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38f4c54f4a47608d2ef5af2fbaca9975cac9e48b93acf41b2ccf584405a3da5e" +checksum = "12a0cb3cce232a3de0d14ef44b58a6537aeb1362cfb6cf4d808691ddbb918956" dependencies = [ "arrow", "datafusion-common", @@ -1159,9 +1150,9 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bf4b50be3ab65650452993eda4baf81edb245fb039b8714476b0f4c8801a527" +checksum = "64d0fef4201777b52951edec086c21a5b246f3c82621569ddb4a26f488bc38a9" dependencies = [ "arrow", "datafusion-common", @@ -1176,9 +1167,9 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.0.0" +version = "52.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66e080e2c105284460580c18e751b2133cc306df298181e4349b5b134632811a" +checksum = "f71f1e39e8f2acbf1c63b0e93756c2e970a64729dab70ac789587d6237c4fde0" dependencies = [ "async-trait", "datafusion-common", @@ -1229,9 +1220,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "find-msvc-tools" -version = "0.1.7" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f449e6c6c08c865631d4890cfacf252b3d396c9bcc83adb6623cdb02a8336c41" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "fixedbitset" @@ -1251,13 +1242,12 @@ dependencies = [ [[package]] name = "flate2" -version = "1.1.5" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ - "crc32fast", - "libz-rs-sys", "miniz_oxide", + "zlib-rs", ] [[package]] @@ -1381,9 +1371,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "libc", @@ -1476,9 +1466,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "iana-time-zone" -version = "0.1.64" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -1652,9 +1642,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" +checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" dependencies = [ "once_cell", "wasm-bindgen", @@ -1735,18 +1725,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" - -[[package]] -name = "libz-rs-sys" -version = "0.5.5" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c10501e7805cee23da17c7790e59df2870c0d4043ec6d03f67d31e2b53e77415" -dependencies = [ - "zlib-rs", -] +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "linux-raw-sys" @@ -1849,9 +1830,9 @@ dependencies = [ [[package]] name = "object_store" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c1be0c6c22ec0817cdc77d3842f721a17fd30ab6965001415b5402a74e6b740" +checksum = "fbfbfff40aeccab00ec8a910b57ca8ecf4319b335c542f2edcd19dd25a1e2a00" dependencies = [ "async-trait", "bytes", @@ -1911,9 +1892,9 @@ dependencies = [ [[package]] name = "parquet" -version = "57.1.0" +version = "57.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89" +checksum = "5f6a2926a30477c0b95fea6c28c3072712b139337a242c2cc64817bdc20a8854" dependencies = [ "ahash", "arrow-array", @@ -2008,9 +1989,9 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "portable-atomic" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89776e4d69bb58bc6993e99ffa1d11f228b839984854c7daeb5d37f87cbe950" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "potential_utf" @@ -2032,18 +2013,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.105" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "535d180e0ecab6268a3e718bb9fd44db66bbbc256257165fc699dadf70d16fe7" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] name = "prost" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" dependencies = [ "bytes", "prost-derive", @@ -2051,9 +2032,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.14.1" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9120690fafc389a67ba3803df527d0ec9cbbc9cc45e4cc20b332996dfb672425" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", "itertools", @@ -2125,9 +2106,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc74d9a594b72ae6656596548f56f667211f8a97b3d4c3d467150794690dc40a" +checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" dependencies = [ "proc-macro2", ] @@ -2160,9 +2141,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] @@ -2178,9 +2159,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -2190,9 +2171,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -2201,9 +2182,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" [[package]] name = "repr_offset" @@ -2338,15 +2319,15 @@ checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "siphasher" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" +checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" @@ -2441,18 +2422,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", @@ -2619,9 +2600,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.19.0" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" +checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f" dependencies = [ "getrandom 0.3.4", "js-sys", @@ -2652,18 +2633,18 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.2+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" +checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" dependencies = [ "cfg-if", "once_cell", @@ -2674,11 +2655,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.56" +version = "0.4.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" +checksum = "70a6e77fd0ae8029c9ea0063f87c46fde723e7d887703d74ad2616d792e51e6f" dependencies = [ "cfg-if", + "futures-util", "js-sys", "once_cell", "wasm-bindgen", @@ -2687,9 +2669,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" +checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2697,9 +2679,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" +checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" dependencies = [ "bumpalo", "proc-macro2", @@ -2710,18 +2692,18 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.106" +version = "0.2.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" +checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" -version = "0.3.83" +version = "0.3.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" +checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" dependencies = [ "js-sys", "wasm-bindgen", @@ -2838,9 +2820,9 @@ dependencies = [ [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" [[package]] name = "writeable" @@ -2873,18 +2855,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.33" +version = "0.8.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "668f5168d10b9ee831de31933dc111a459c97ec93225beb307aed970d1372dfd" +checksum = "57cf3aa6855b23711ee9852dfc97dfaa51c45feaba5b645d0c777414d494a961" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.33" +version = "0.8.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c7962b26b0a8685668b671ee4b54d007a67d4eaf05fda79ac0ecf41e32270f1" +checksum = "8a616990af1a287837c4fe6596ad77ef57948f787e46ce28e166facc0cc1cb75" dependencies = [ "proc-macro2", "quote", @@ -2947,15 +2929,15 @@ dependencies = [ [[package]] name = "zlib-rs" -version = "0.5.5" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" +checksum = "a7948af682ccbc3342b6e9420e8c51c1fe5d7bf7756002b4a3c6cabfe96a7e3c" [[package]] name = "zmij" -version = "1.0.12" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fc5a66a20078bf1251bde995aa2fdcc4b800c70b5d92dd2c62abc5c60f679f8" +checksum = "3ff05f8caa9038894637571ae6b9e29466c1f4f829d26c9b28f869a29cbe3445" [[package]] name = "zstd" From adec2dcd785daa16a02f8029c1067c1d7594d67c Mon Sep 17 00:00:00 2001 From: Antoine Beyeler <49431240+abey79@users.noreply.github.com> Date: Wed, 4 Feb 2026 20:37:29 +0100 Subject: [PATCH 09/87] Improve displayed error by using `DataFusionError`'s `Display` trait (#1370) * Use instead of for * update test --- python/tests/test_indexing.py | 2 +- python/tests/test_udf.py | 2 +- src/errors.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tests/test_indexing.py b/python/tests/test_indexing.py index 327decd2f..e4c60b55d 100644 --- a/python/tests/test_indexing.py +++ b/python/tests/test_indexing.py @@ -43,7 +43,7 @@ def test_err(df): with pytest.raises(Exception) as e_info: df["c"] - for e in ["SchemaError", "FieldNotFound", 'name: "c"']: + for e in ["Schema error", "No field named c"]: assert e in e_info.value.args[0] with pytest.raises(Exception) as e_info: diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py index c90668efe..c0ba1d831 100644 --- a/python/tests/test_udf.py +++ b/python/tests/test_udf.py @@ -207,4 +207,4 @@ def non_nullable_abs(input_col): with pytest.raises(Exception) as e_info: _results = df_result.collect() - assert "InvalidArgumentError" in str(e_info) + assert "Invalid argument error" in str(e_info) diff --git a/src/errors.rs b/src/errors.rs index fc079eb6c..d1b518042 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -39,7 +39,7 @@ pub enum PyDataFusionError { impl fmt::Display for PyDataFusionError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - PyDataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {e:?}"), + PyDataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {e}"), PyDataFusionError::ArrowError(e) => write!(f, "Arrow error: {e:?}"), PyDataFusionError::PythonError(e) => write!(f, "Python error {e:?}"), PyDataFusionError::Common(e) => write!(f, "{e}"), From ee62f7a26d178995a497c081625b8f5ccc5a82fd Mon Sep 17 00:00:00 2001 From: kosiew Date: Thu, 5 Feb 2026 20:09:25 +0800 Subject: [PATCH 10/87] Enforce DataFrame display memory limits with `max_rows` + `min_rows` constraint (deprecate `repr_rows`) (#1367) * Update DataFrameHtmlFormatter to enforce min_rows_display constraint and adjust default values * Refactor DataFrame formatter to replace repr_rows with max_rows and update related validations * Add validation for formatter parameters and deprecate repr_rows alias * Add boundary condition tests for HTML formatter memory limits and resolve max_rows logic * Remove repr_rows handling in max_rows resolution in Rust * Refactor whitespace in parameter validation and update test for HTML formatter memory limits * ruff fix * Rename min_rows_display to min_rows in formatter configuration and update related tests * Refactor function parameter handling and documentation Removed type annotations and redundant default values from parameter names. Enhanced descriptions for clarity and added context for usage. Fixed formatting for the documentation sections to improve readability. * Update HTML formatter memory boundary tests for large datasets * Enhance memory boundary tests in HTML formatter for large datasets * Add fixture for multi-batch DataFrame and test early stream termination with memory limits * Add backward compatibility tests for deprecated formatter attributes * ruff fix * Remove deprecation timeline comments from HTML formatter backward compatibility test --- .../source/user-guide/dataframe/rendering.rst | 8 +- python/datafusion/dataframe_formatter.py | 235 ++++++++++++---- python/tests/test_dataframe.py | 262 ++++++++++++++++-- src/dataframe.rs | 57 +++- 4 files changed, 474 insertions(+), 88 deletions(-) diff --git a/docs/source/user-guide/dataframe/rendering.rst b/docs/source/user-guide/dataframe/rendering.rst index 4c37c7471..9dea948bb 100644 --- a/docs/source/user-guide/dataframe/rendering.rst +++ b/docs/source/user-guide/dataframe/rendering.rst @@ -57,8 +57,8 @@ You can customize how DataFrames are rendered by configuring the formatter: max_width=1000, # Maximum width in pixels max_height=300, # Maximum height in pixels max_memory_bytes=2097152, # Maximum memory for rendering (2MB) - min_rows_display=20, # Minimum number of rows to display - repr_rows=10, # Number of rows to display in __repr__ + min_rows=10, # Minimum number of rows to display + max_rows=10, # Maximum rows to display in __repr__ enable_cell_expansion=True,# Allow expanding truncated cells custom_css=None, # Additional custom CSS show_truncation_message=True, # Show message when data is truncated @@ -190,8 +190,8 @@ You can control how much data is displayed and how much memory is used for rende configure_formatter( max_memory_bytes=4 * 1024 * 1024, # 4MB maximum memory for display - min_rows_display=50, # Always show at least 50 rows - repr_rows=20 # Show 20 rows in __repr__ output + min_rows=20, # Always show at least 20 rows + max_rows=50 # Show up to 50 rows in output ) These parameters help balance comprehensive data display against performance considerations. diff --git a/python/datafusion/dataframe_formatter.py b/python/datafusion/dataframe_formatter.py index bb53d323e..b8af45a1b 100644 --- a/python/datafusion/dataframe_formatter.py +++ b/python/datafusion/dataframe_formatter.py @@ -18,6 +18,7 @@ from __future__ import annotations +import warnings from typing import ( TYPE_CHECKING, Any, @@ -61,6 +62,93 @@ def _validate_bool(value: Any, param_name: str) -> None: raise TypeError(msg) +def _validate_formatter_parameters( + max_cell_length: int, + max_width: int, + max_height: int, + max_memory_bytes: int, + min_rows: int, + max_rows: int | None, + repr_rows: int | None, + enable_cell_expansion: bool, + show_truncation_message: bool, + use_shared_styles: bool, + custom_css: str | None, + style_provider: Any, +) -> int: + """Validate all formatter parameters and return resolved max_rows value. + + Args: + max_cell_length: Maximum cell length value to validate + max_width: Maximum width value to validate + max_height: Maximum height value to validate + max_memory_bytes: Maximum memory bytes value to validate + min_rows: Minimum rows to display value to validate + max_rows: Maximum rows value to validate (None means use default) + repr_rows: Deprecated repr_rows value to validate + enable_cell_expansion: Boolean expansion flag to validate + show_truncation_message: Boolean message flag to validate + use_shared_styles: Boolean styles flag to validate + custom_css: Custom CSS string to validate + style_provider: Style provider object to validate + + Returns: + The resolved max_rows value after handling repr_rows deprecation + + Raises: + ValueError: If any numeric parameter is invalid or constraints are violated + TypeError: If any parameter has invalid type + DeprecationWarning: If repr_rows parameter is used + """ + # Validate numeric parameters + _validate_positive_int(max_cell_length, "max_cell_length") + _validate_positive_int(max_width, "max_width") + _validate_positive_int(max_height, "max_height") + _validate_positive_int(max_memory_bytes, "max_memory_bytes") + _validate_positive_int(min_rows, "min_rows") + + # Handle deprecated repr_rows parameter + if repr_rows is not None: + warnings.warn( + "repr_rows parameter is deprecated, use max_rows instead", + DeprecationWarning, + stacklevel=4, + ) + _validate_positive_int(repr_rows, "repr_rows") + if max_rows is not None and repr_rows != max_rows: + msg = "Cannot specify both repr_rows and max_rows; use max_rows only" + raise ValueError(msg) + max_rows = repr_rows + + # Use default if max_rows was not provided + if max_rows is None: + max_rows = 10 + + _validate_positive_int(max_rows, "max_rows") + + # Validate constraint: min_rows <= max_rows + if min_rows > max_rows: + msg = "min_rows must be less than or equal to max_rows" + raise ValueError(msg) + + # Validate boolean parameters + _validate_bool(enable_cell_expansion, "enable_cell_expansion") + _validate_bool(show_truncation_message, "show_truncation_message") + _validate_bool(use_shared_styles, "use_shared_styles") + + # Validate custom_css + if custom_css is not None and not isinstance(custom_css, str): + msg = "custom_css must be None or a string" + raise TypeError(msg) + + # Validate style_provider + if style_provider is not None and not isinstance(style_provider, StyleProvider): + msg = "style_provider must implement the StyleProvider protocol" + raise TypeError(msg) + + return max_rows + + @runtime_checkable class CellFormatter(Protocol): """Protocol for cell value formatters.""" @@ -126,8 +214,9 @@ class DataFrameHtmlFormatter: max_width: Maximum width of the HTML table in pixels max_height: Maximum height of the HTML table in pixels max_memory_bytes: Maximum memory in bytes for rendered data (default: 2MB) - min_rows_display: Minimum number of rows to display - repr_rows: Default number of rows to display in repr output + min_rows: Minimum number of rows to display (must be <= max_rows) + max_rows: Maximum number of rows to display in repr output + repr_rows: Deprecated alias for max_rows enable_cell_expansion: Whether to add expand/collapse buttons for long cell values custom_css: Additional CSS to include in the HTML output @@ -143,8 +232,9 @@ def __init__( max_width: int = 1000, max_height: int = 300, max_memory_bytes: int = 2 * 1024 * 1024, # 2 MB - min_rows_display: int = 20, - repr_rows: int = 10, + min_rows: int = 10, + max_rows: int | None = None, + repr_rows: int | None = None, enable_cell_expansion: bool = True, custom_css: str | None = None, show_truncation_message: bool = True, @@ -155,71 +245,70 @@ def __init__( Parameters ---------- - max_cell_length : int, default 25 + max_cell_length Maximum length of cell content before truncation. - max_width : int, default 1000 + max_width Maximum width of the displayed table in pixels. - max_height : int, default 300 + max_height Maximum height of the displayed table in pixels. - max_memory_bytes : int, default 2097152 (2MB) - Maximum memory in bytes for rendered data. - min_rows_display : int, default 20 - Minimum number of rows to display. - repr_rows : int, default 10 - Default number of rows to display in repr output. - enable_cell_expansion : bool, default True + max_memory_bytes + Maximum memory in bytes for rendered data. Helps prevent performance + issues with large datasets. + min_rows + Minimum number of rows to display even if memory limit is reached. + Must not exceed ``max_rows``. + max_rows + Maximum number of rows to display. Takes precedence over memory limits + when fewer rows are requested. + repr_rows + Deprecated alias for ``max_rows``. Use ``max_rows`` instead. + enable_cell_expansion Whether to allow cells to expand when clicked. - custom_css : str, optional + custom_css Custom CSS to apply to the HTML table. - show_truncation_message : bool, default True + show_truncation_message Whether to show a message indicating that content has been truncated. - style_provider : StyleProvider, optional + style_provider Provider of CSS styles for the HTML table. If None, DefaultStyleProvider is used. - use_shared_styles : bool, default True - Whether to use shared styles across multiple tables. + use_shared_styles + Whether to use shared styles across multiple tables. This improves + performance when displaying many DataFrames in a single notebook. Raises: ------ ValueError If max_cell_length, max_width, max_height, max_memory_bytes, - min_rows_display, or repr_rows is not a positive integer. + min_rows or max_rows is not a positive integer, or if min_rows + exceeds max_rows. TypeError If enable_cell_expansion, show_truncation_message, or use_shared_styles is - not a boolean, - or if custom_css is provided but is not a string, - or if style_provider is provided but does not implement the StyleProvider + not a boolean, or if custom_css is provided but is not a string, or if + style_provider is provided but does not implement the StyleProvider protocol. """ - # Validate numeric parameters - _validate_positive_int(max_cell_length, "max_cell_length") - _validate_positive_int(max_width, "max_width") - _validate_positive_int(max_height, "max_height") - _validate_positive_int(max_memory_bytes, "max_memory_bytes") - _validate_positive_int(min_rows_display, "min_rows_display") - _validate_positive_int(repr_rows, "repr_rows") - - # Validate boolean parameters - _validate_bool(enable_cell_expansion, "enable_cell_expansion") - _validate_bool(show_truncation_message, "show_truncation_message") - _validate_bool(use_shared_styles, "use_shared_styles") - - # Validate custom_css - if custom_css is not None and not isinstance(custom_css, str): - msg = "custom_css must be None or a string" - raise TypeError(msg) - - # Validate style_provider - if style_provider is not None and not isinstance(style_provider, StyleProvider): - msg = "style_provider must implement the StyleProvider protocol" - raise TypeError(msg) + # Validate all parameters and get resolved max_rows + resolved_max_rows = _validate_formatter_parameters( + max_cell_length, + max_width, + max_height, + max_memory_bytes, + min_rows, + max_rows, + repr_rows, + enable_cell_expansion, + show_truncation_message, + use_shared_styles, + custom_css, + style_provider, + ) self.max_cell_length = max_cell_length self.max_width = max_width self.max_height = max_height self.max_memory_bytes = max_memory_bytes - self.min_rows_display = min_rows_display - self.repr_rows = repr_rows + self.min_rows = min_rows + self._max_rows = resolved_max_rows self.enable_cell_expansion = enable_cell_expansion self.custom_css = custom_css self.show_truncation_message = show_truncation_message @@ -231,6 +320,55 @@ def __init__( self._custom_cell_builder: Callable[[Any, int, int, str], str] | None = None self._custom_header_builder: Callable[[Any], str] | None = None + @property + def max_rows(self) -> int: + """Get the maximum number of rows to display. + + Returns: + The maximum number of rows to display in repr output + """ + return self._max_rows + + @max_rows.setter + def max_rows(self, value: int) -> None: + """Set the maximum number of rows to display. + + Args: + value: The maximum number of rows + """ + self._max_rows = value + + @property + def repr_rows(self) -> int: + """Get the maximum number of rows (deprecated name). + + .. deprecated:: + Use :attr:`max_rows` instead. This property is provided for + backward compatibility. + + Returns: + The maximum number of rows to display + """ + return self._max_rows + + @repr_rows.setter + def repr_rows(self, value: int) -> None: + """Set the maximum number of rows using deprecated name. + + .. deprecated:: + Use :attr:`max_rows` setter instead. This property is provided for + backward compatibility. + + Args: + value: The maximum number of rows + """ + warnings.warn( + "repr_rows is deprecated, use max_rows instead", + DeprecationWarning, + stacklevel=2, + ) + self._max_rows = value + def register_formatter(self, type_class: type, formatter: CellFormatter) -> None: """Register a custom formatter for a specific data type. @@ -659,7 +797,8 @@ def configure_formatter(**kwargs: Any) -> None: "max_width", "max_height", "max_memory_bytes", - "min_rows_display", + "min_rows", + "max_rows", "repr_rows", "enable_cell_expansion", "custom_css", diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py index 53a661969..71abe2925 100644 --- a/python/tests/test_dataframe.py +++ b/python/tests/test_dataframe.py @@ -91,6 +91,39 @@ def large_df(): return ctx.from_arrow(batch) +@pytest.fixture +def large_multi_batch_df(): + """Create a DataFrame with multiple record batches for testing stream behavior. + + This fixture creates 10 batches of 10,000 rows each (100,000 rows total), + ensuring the DataFrame spans multiple batches. This is essential for testing + that memory limits actually cause early stream termination rather than + truncating all collected data. + """ + ctx = SessionContext() + + # Create multiple batches, each with 10,000 rows + batches = [] + rows_per_batch = 10000 + num_batches = 10 + + for batch_idx in range(num_batches): + start_row = batch_idx * rows_per_batch + end_row = start_row + rows_per_batch + data = { + "a": list(range(start_row, end_row)), + "b": [f"s-{i}" for i in range(start_row, end_row)], + "c": [float(i + 0.1) for i in range(start_row, end_row)], + } + batch = pa.record_batch(data) + batches.append(batch) + + # Register as record batches to maintain multi-batch structure + # Using [batches] wraps list in another list as required by register_record_batches + ctx.register_record_batches("large_multi_batch_data", [batches]) + return ctx.table("large_multi_batch_data") + + @pytest.fixture def struct_df(): ctx = SessionContext() @@ -1438,7 +1471,7 @@ def get_header_style(self) -> str: def test_html_formatter_memory(df, clean_formatter_state): """Test the memory and row control parameters in DataFrameHtmlFormatter.""" - configure_formatter(max_memory_bytes=10, min_rows_display=1) + configure_formatter(max_memory_bytes=10, min_rows=1) html_output = df._repr_html_() # Count the number of table rows in the output @@ -1448,7 +1481,7 @@ def test_html_formatter_memory(df, clean_formatter_state): assert tr_count == 2 # 1 for header row, 1 for data row assert "data truncated" in html_output.lower() - configure_formatter(max_memory_bytes=10 * MB, min_rows_display=1) + configure_formatter(max_memory_bytes=10 * MB, min_rows=1) html_output = df._repr_html_() # With larger memory limit and min_rows=2, should display all rows tr_count = count_table_rows(html_output) @@ -1458,15 +1491,136 @@ def test_html_formatter_memory(df, clean_formatter_state): assert "data truncated" not in html_output.lower() -def test_html_formatter_repr_rows(df, clean_formatter_state): - configure_formatter(min_rows_display=2, repr_rows=2) +def test_html_formatter_memory_boundary_conditions(large_df, clean_formatter_state): + """Test memory limit behavior at boundary conditions with large dataset. + + This test validates that the formatter correctly handles edge cases when + the memory limit is reached with a large dataset (100,000 rows), ensuring + that min_rows constraint is properly respected while respecting memory limits. + Uses large_df to actually test memory limit behavior with realistic data sizes. + """ + + # Get the raw size of the data to test boundary conditions + # First, capture output with no limits + # NOTE: max_rows=200000 is set well above the dataset size (100k rows) to ensure + # we're testing memory limits, not row limits. Default max_rows=10 would + # truncate before memory limit is reached. + configure_formatter(max_memory_bytes=10 * MB, min_rows=1, max_rows=200000) + unrestricted_output = large_df._repr_html_() + unrestricted_rows = count_table_rows(unrestricted_output) + + # Test 1: Very small memory limit should still respect min_rows + # With large dataset, this should definitely hit memory limit before min_rows + configure_formatter(max_memory_bytes=10, min_rows=1) + html_output = large_df._repr_html_() + tr_count = count_table_rows(html_output) + assert tr_count >= 2 # At least header + 1 data row (minimum) + # Should show truncation since we limited memory so aggressively + assert "data truncated" in html_output.lower() + + # Test 2: Memory limit at default size (2MB) should truncate the large dataset + # Default max_rows would truncate at 10 rows, so we don't set it here to test + # that memory limit is respected even with default row limit + configure_formatter(max_memory_bytes=2 * MB, min_rows=1) + html_output = large_df._repr_html_() + tr_count = count_table_rows(html_output) + assert tr_count >= 2 # At least header + min_rows + # Should be truncated since full dataset is much larger than 2MB + assert tr_count < unrestricted_rows + + # Test 3: Very large memory limit should show much more data + # NOTE: max_rows=200000 is critical here - without it, default max_rows=10 + # would limit output to 10 rows even though we have 100MB of memory available + configure_formatter(max_memory_bytes=100 * MB, min_rows=1, max_rows=200000) + html_output = large_df._repr_html_() + tr_count = count_table_rows(html_output) + # Should show significantly more rows, possibly all + assert tr_count > 100 # Should show substantially more rows + + # Test 4: Min rows should override memory limit + # With tiny memory and larger min_rows, min_rows should win + configure_formatter(max_memory_bytes=10, min_rows=2) + html_output = large_df._repr_html_() + tr_count = count_table_rows(html_output) + assert tr_count >= 3 # At least header + 2 data rows (min_rows) + # Should show truncation message despite min_rows being satisfied + assert "data truncated" in html_output.lower() + + # Test 5: With reasonable memory and min_rows settings + # NOTE: max_rows=200000 ensures we test memory limit behavior, not row limit + configure_formatter(max_memory_bytes=2 * MB, min_rows=10, max_rows=200000) + html_output = large_df._repr_html_() + tr_count = count_table_rows(html_output) + assert tr_count >= 11 # header + at least 10 data rows (min_rows) + # Should be truncated due to memory limit + assert tr_count < unrestricted_rows + + +def test_html_formatter_stream_early_termination( + large_multi_batch_df, clean_formatter_state +): + """Test that memory limits cause early stream termination with multi-batch data. + + This test specifically validates that the formatter stops collecting data when + the memory limit is reached, rather than collecting all data and then truncating. + The large_multi_batch_df fixture creates 10 record batches, allowing us to verify + that not all batches are consumed when memory limit is hit. + + Key difference from test_html_formatter_memory_boundary_conditions: + - Uses multi-batch DataFrame to verify stream termination behavior + - Tests with memory limit exceeded by 2-3 batches but not 1 batch + - Verifies partial data + truncation message + respects min_rows + """ + + # Get baseline: how much data fits without memory limit + configure_formatter(max_memory_bytes=100 * MB, min_rows=1, max_rows=200000) + unrestricted_output = large_multi_batch_df._repr_html_() + unrestricted_rows = count_table_rows(unrestricted_output) + + # Test 1: Memory limit exceeded by ~2 batches (each batch ~10k rows) + # With 1 batch (~1-2MB), we should have space. With 2-3 batches, we exceed limit. + # Set limit to ~3MB to ensure we collect ~1 batch before hitting limit + configure_formatter(max_memory_bytes=3 * MB, min_rows=1, max_rows=200000) + html_output = large_multi_batch_df._repr_html_() + tr_count = count_table_rows(html_output) + + # Should show significant truncation (not all 100k rows) + assert tr_count < unrestricted_rows, "Should be truncated by memory limit" + assert tr_count >= 2, "Should respect min_rows" + assert "data truncated" in html_output.lower(), "Should indicate truncation" + + # Test 2: Very tight memory limit should still respect min_rows + # Even with tiny memory (10 bytes), should show at least min_rows + configure_formatter(max_memory_bytes=10, min_rows=5, max_rows=200000) + html_output = large_multi_batch_df._repr_html_() + tr_count = count_table_rows(html_output) + + assert tr_count >= 6, "Should show header + at least min_rows (5)" + assert "data truncated" in html_output.lower(), "Should indicate truncation" + + # Test 3: Memory limit should take precedence over max_rows in early termination + # With max_rows=100 but small memory limit, should terminate early due to memory + configure_formatter(max_memory_bytes=2 * MB, min_rows=1, max_rows=100) + html_output = large_multi_batch_df._repr_html_() + tr_count = count_table_rows(html_output) + + # Should be truncated by memory limit (showing more than max_rows would suggest + # but less than unrestricted) + assert tr_count >= 2, "Should respect min_rows" + assert tr_count < unrestricted_rows, "Should be truncated" + # Output should indicate why truncation occurred + assert "data truncated" in html_output.lower() + + +def test_html_formatter_max_rows(df, clean_formatter_state): + configure_formatter(min_rows=2, max_rows=2) html_output = df._repr_html_() tr_count = count_table_rows(html_output) # Table should have header row (1) + 2 data rows = 3 rows assert tr_count == 3 - configure_formatter(min_rows_display=2, repr_rows=3) + configure_formatter(min_rows=2, max_rows=3) html_output = df._repr_html_() tr_count = count_table_rows(html_output) @@ -1492,17 +1646,42 @@ def test_html_formatter_validation(): with pytest.raises(ValueError, match="max_memory_bytes must be a positive integer"): DataFrameHtmlFormatter(max_memory_bytes=-100) - with pytest.raises(ValueError, match="min_rows_display must be a positive integer"): - DataFrameHtmlFormatter(min_rows_display=0) + with pytest.raises(ValueError, match="min_rows must be a positive integer"): + DataFrameHtmlFormatter(min_rows=0) + + with pytest.raises(ValueError, match="min_rows must be a positive integer"): + DataFrameHtmlFormatter(min_rows=-5) - with pytest.raises(ValueError, match="min_rows_display must be a positive integer"): - DataFrameHtmlFormatter(min_rows_display=-5) + with pytest.raises(ValueError, match="max_rows must be a positive integer"): + DataFrameHtmlFormatter(max_rows=0) + + with pytest.raises(ValueError, match="max_rows must be a positive integer"): + DataFrameHtmlFormatter(max_rows=-10) + + with pytest.raises( + ValueError, match="min_rows must be less than or equal to max_rows" + ): + DataFrameHtmlFormatter(min_rows=5, max_rows=4) - with pytest.raises(ValueError, match="repr_rows must be a positive integer"): - DataFrameHtmlFormatter(repr_rows=0) - with pytest.raises(ValueError, match="repr_rows must be a positive integer"): - DataFrameHtmlFormatter(repr_rows=-10) +def test_repr_rows_backward_compatibility(clean_formatter_state): + """Test that repr_rows parameter still works as deprecated alias.""" + # Should work when not conflicting with max_rows + with pytest.warns(DeprecationWarning, match="repr_rows parameter is deprecated"): + formatter = DataFrameHtmlFormatter(repr_rows=15, min_rows=10) + assert formatter.max_rows == 15 + assert formatter.repr_rows == 15 + + # Should fail when conflicting with max_rows + with pytest.raises(ValueError, match="Cannot specify both repr_rows and max_rows"): + DataFrameHtmlFormatter(repr_rows=5, max_rows=10) + + # Setting repr_rows via property should warn + formatter2 = DataFrameHtmlFormatter() + with pytest.warns(DeprecationWarning, match="repr_rows is deprecated"): + formatter2.repr_rows = 7 + assert formatter2.max_rows == 7 + assert formatter2.repr_rows == 7 def test_configure_formatter(df, clean_formatter_state): @@ -1514,8 +1693,8 @@ def test_configure_formatter(df, clean_formatter_state): max_width = 500 max_height = 30 max_memory_bytes = 3 * MB - min_rows_display = 2 - repr_rows = 2 + min_rows = 2 + max_rows = 2 enable_cell_expansion = False show_truncation_message = False use_shared_styles = False @@ -1527,8 +1706,8 @@ def test_configure_formatter(df, clean_formatter_state): assert formatter_default.max_width != max_width assert formatter_default.max_height != max_height assert formatter_default.max_memory_bytes != max_memory_bytes - assert formatter_default.min_rows_display != min_rows_display - assert formatter_default.repr_rows != repr_rows + assert formatter_default.min_rows != min_rows + assert formatter_default.max_rows != max_rows assert formatter_default.enable_cell_expansion != enable_cell_expansion assert formatter_default.show_truncation_message != show_truncation_message assert formatter_default.use_shared_styles != use_shared_styles @@ -1539,8 +1718,8 @@ def test_configure_formatter(df, clean_formatter_state): max_width=max_width, max_height=max_height, max_memory_bytes=max_memory_bytes, - min_rows_display=min_rows_display, - repr_rows=repr_rows, + min_rows=min_rows, + max_rows=max_rows, enable_cell_expansion=enable_cell_expansion, show_truncation_message=show_truncation_message, use_shared_styles=use_shared_styles, @@ -1550,8 +1729,8 @@ def test_configure_formatter(df, clean_formatter_state): assert formatter_custom.max_width == max_width assert formatter_custom.max_height == max_height assert formatter_custom.max_memory_bytes == max_memory_bytes - assert formatter_custom.min_rows_display == min_rows_display - assert formatter_custom.repr_rows == repr_rows + assert formatter_custom.min_rows == min_rows + assert formatter_custom.max_rows == max_rows assert formatter_custom.enable_cell_expansion == enable_cell_expansion assert formatter_custom.show_truncation_message == show_truncation_message assert formatter_custom.use_shared_styles == use_shared_styles @@ -2955,6 +3134,47 @@ def test_html_formatter_manual_format_html(clean_formatter_state): assert "