diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..d7e0ee2 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Keep Cargo's workspace output out of `target/` so `mvn clean` (which deletes +# the root `target/`) does not nuke the Rust build cache. +[build] +target-dir = "rust-target" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c5db936..da8e65a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -83,8 +83,8 @@ jobs: path: | ~/.cargo/registry ~/.cargo/git - native/target - key: ${{ runner.os }}-cargo-${{ hashFiles('native/Cargo.lock') }} + rust-target + key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }} restore-keys: ${{ runner.os }}-cargo- - name: Build native and run tests diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 4cf628f..952bf34 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -54,7 +54,7 @@ jobs: run: ./mvnw -q spotless:check - name: Check Rust formatting - run: cd native && cargo fmt --all -- --check + run: cargo fmt --all -- --check clippy: name: Clippy @@ -81,9 +81,9 @@ jobs: path: | ~/.cargo/registry ~/.cargo/git - native/target - key: ${{ runner.os }}-clippy-${{ hashFiles('native/Cargo.lock') }} + rust-target + key: ${{ runner.os }}-clippy-${{ hashFiles('Cargo.lock') }} restore-keys: ${{ runner.os }}-clippy- - name: Run clippy - run: cd native && cargo clippy --all-targets -- -D warnings + run: cargo clippy --workspace --all-targets -- -D warnings diff --git a/.gitignore b/.gitignore index 719a2a4..25c9216 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ target/ +rust-target/ *.class .idea/ .vscode/ diff --git a/native/Cargo.lock b/Cargo.lock similarity index 94% rename from native/Cargo.lock rename to Cargo.lock index 96d2f9d..41d022d 100644 --- a/native/Cargo.lock +++ b/Cargo.lock @@ -98,9 +98,9 @@ dependencies = [ [[package]] name = "ar_archive_writer" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" +checksum = "4087686b4b0a3427190bae57a1d9a478dbb2d40c5dc1bd6e2b6d797913bdd348" dependencies = [ "object", ] @@ -119,9 +119,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "607e64bb911ee4f90483e044fe78f175989148c2892e659a2cd25429e782ec54" +checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471" dependencies = [ "arrow-arith", "arrow-array", @@ -140,9 +140,9 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e754319ed8a85d817fe7adf183227e0b5308b82790a737b426c1124626b48118" +checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738" dependencies = [ "arrow-array", "arrow-buffer", @@ -154,9 +154,9 @@ dependencies = [ [[package]] name = "arrow-array" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841321891f247aa86c6112c80d83d89cb36e0addd020fa2425085b8eb6c3f579" +checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e" dependencies = [ "ahash", "arrow-buffer", @@ -173,9 +173,9 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f955dfb73fae000425f49c8226d2044dab60fb7ad4af1e24f961756354d996c9" +checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0" dependencies = [ "bytes", "half", @@ -185,9 +185,9 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca5e686972523798f76bef355145bc1ae25a84c731e650268d31ab763c701663" +checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f" dependencies = [ "arrow-array", "arrow-buffer", @@ -207,9 +207,9 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86c276756867fc8186ec380c72c290e6e3b23a1d4fb05df6b1d62d2e62666d48" +checksum = "e94e8cf7e517657a52b91ea1263acf38c4ca62a84655d72458a3359b12ab97de" dependencies = [ "arrow-array", "arrow-cast", @@ -222,9 +222,9 @@ dependencies = [ [[package]] name = "arrow-data" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db3b5846209775b6dc8056d77ff9a032b27043383dd5488abd0b663e265b9373" +checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0" dependencies = [ "arrow-buffer", "arrow-schema", @@ -235,9 +235,9 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd8907ddd8f9fbabf91ec2c85c1d81fe2874e336d2443eb36373595e28b98dd5" +checksum = "238438f0834483703d88896db6fe5a7138b2230debc31b34c0336c2996e3c64f" dependencies = [ "arrow-array", "arrow-buffer", @@ -251,9 +251,9 @@ dependencies = [ [[package]] name = "arrow-json" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4518c59acc501f10d7dcae397fe12b8db3d81bc7de94456f8a58f9165d6f502" +checksum = "205ca2119e6d679d5c133c6f30e68f027738d95ed948cf77677ea69c7800036b" dependencies = [ "arrow-array", "arrow-buffer", @@ -276,9 +276,9 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efa70d9d6b1356f1fb9f1f651b84a725b7e0abb93f188cf7d31f14abfa2f2e6f" +checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0" dependencies = [ "arrow-array", "arrow-buffer", @@ -289,9 +289,9 @@ dependencies = [ [[package]] name = "arrow-row" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faec88a945338192beffbbd4be0def70135422930caa244ac3cec0cd213b26b4" +checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c" dependencies = [ "arrow-array", "arrow-buffer", @@ -302,9 +302,9 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18aa020f6bc8e5201dcd2d4b7f98c68f8a410ef37128263243e6ff2a47a67d4f" +checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed" dependencies = [ "bitflags", "serde_core", @@ -313,9 +313,9 @@ dependencies = [ [[package]] name = "arrow-select" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a657ab5132e9c8ca3b24eb15a823d0ced38017fe3930ff50167466b02e2d592c" +checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222" dependencies = [ "ahash", "arrow-array", @@ -327,9 +327,9 @@ dependencies = [ [[package]] name = "arrow-string" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6de2efbbd1a9f9780ceb8d1ff5d20421b35863b361e3386b4f571f1fc69fcb8" +checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42" dependencies = [ "arrow-array", "arrow-buffer", @@ -393,9 +393,9 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" [[package]] name = "base64" @@ -419,9 +419,9 @@ dependencies = [ [[package]] name = "bitflags" -version = "2.11.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" [[package]] name = "blake2" @@ -457,9 +457,9 @@ dependencies = [ [[package]] name = "bon" -version = "3.9.1" +version = "3.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe" +checksum = "b2f04f6fef12d70d42a77b1433c9e0f065238479a6cefc4f5bab105e9873a3c3" dependencies = [ "bon-macros", "rustversion", @@ -467,9 +467,9 @@ dependencies = [ [[package]] name = "bon-macros" -version = "3.9.1" +version = "3.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c" +checksum = "7d0bd4c2f75335ad98052a37efb54f428b492f64340257143b3429c8a508fa7b" dependencies = [ "darling", "ident_case", @@ -482,9 +482,9 @@ dependencies = [ [[package]] name = "brotli" -version = "8.0.2" +version = "8.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" +checksum = "8119e4516436f5708bbc474a9d395bf12f1b5395e93a92a56e647ac3388c8610" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -493,9 +493,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "5.0.0" +version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +checksum = "5962523e1b92ce1b5e793d9169b9943eece10d39f62550bc04bb605d75b94924" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -503,9 +503,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.20.2" +version = "3.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649" [[package]] name = "byteorder" @@ -530,9 +530,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.62" +version = "1.2.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" +checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" dependencies = [ "find-msvc-tools", "jobserver", @@ -571,9 +571,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.44" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" +checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" dependencies = [ "iana-time-zone", "num-traits", @@ -789,9 +789,9 @@ dependencies = [ [[package]] name = "dashmap" -version = "6.1.0" +version = "6.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +checksum = "e6361d5c062261c78a176addb82d4c821ae42bed6089de0e12603cd25de2059c" dependencies = [ "cfg-if", "crossbeam-utils", @@ -1306,6 +1306,7 @@ dependencies = [ "arrow", "async-trait", "datafusion", + "datafusion-jni-common", "datafusion-proto", "datafusion-spark", "datafusion-substrait", @@ -1320,6 +1321,16 @@ dependencies = [ "url", ] +[[package]] +name = "datafusion-jni-common" +version = "0.1.0" +dependencies = [ + "datafusion", + "futures", + "jni", + "tokio", +] + [[package]] name = "datafusion-macros" version = "53.1.0" @@ -1514,6 +1525,31 @@ dependencies = [ "log", ] +[[package]] +name = "datafusion-scan-ffi" +version = "0.1.0" +dependencies = [ + "arrow", + "datafusion", + "datafusion-proto", + "datafusion-scan-ffi", + "futures", + "prost", + "prost-build", + "protoc-bin-vendored", + "tokio", +] + +[[package]] +name = "datafusion-scan-jni" +version = "0.1.0" +dependencies = [ + "arrow", + "datafusion-scan-ffi", + "jni", + "prost", +] + [[package]] name = "datafusion-session" version = "53.1.0" @@ -1607,9 +1643,9 @@ dependencies = [ [[package]] name = "displaydoc" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f" dependencies = [ "proc-macro2", "quote", @@ -1624,9 +1660,9 @@ checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" [[package]] name = "either" -version = "1.15.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" [[package]] name = "equivalent" @@ -1932,9 +1968,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "http" -version = "1.4.0" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +checksum = "6970f50e31d6fc17d3fa27329444bfa74e196cf62e95052a3f6fee181dba6425" dependencies = [ "bytes", "itoa", @@ -1977,9 +2013,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" [[package]] name = "hyper" -version = "1.9.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" +checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498" dependencies = [ "atomic-waker", "bytes", @@ -2269,13 +2305,12 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.98" +version = "0.3.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08" +checksum = "f2025f20d7a4fa7785846e7b63d10a76d3f1cee98ee5cb79ea59703f95e42162" dependencies = [ "cfg-if", "futures-util", - "once_cell", "wasm-bindgen", ] @@ -2344,9 +2379,9 @@ dependencies = [ [[package]] name = "libbz2-rs-sys" -version = "0.2.3" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3a6a8c165077efc8f3a971534c50ea6a1a18b329ef4a66e897a7e3a1494565f" +checksum = "34b357333733e8260735ba5894eb928c02ecc69c78715f01a8019e7fa7f2db4c" [[package]] name = "libc" @@ -2403,9 +2438,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.29" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "lru-slab" @@ -2434,9 +2469,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.8.0" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" [[package]] name = "miniz_oxide" @@ -2450,9 +2485,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" +checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" dependencies = [ "libc", "wasi", @@ -2598,9 +2633,9 @@ dependencies = [ [[package]] name = "parquet" -version = "58.2.0" +version = "58.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d7efd3052f7d6ef601085559a246bc991e9a8cc77e02753737df6322ce35f1" +checksum = "5dafa7d01085b62a47dd0c1829550a0a36710ea9c4fe358a05a85477cec8a908" dependencies = [ "ahash", "arrow-array", @@ -2762,9 +2797,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" +checksum = "528ac67416ff8646872a3c02cad9cc4ee5dc9f9540c9b10771855c95cb2e5ae1" dependencies = [ "bytes", "prost-derive", @@ -2772,9 +2807,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" +checksum = "03da047801ff44bb6a4d407d4860c05fd70bb81714e6b2f3812603d5b145b042" dependencies = [ "heck", "itertools", @@ -2791,9 +2826,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" +checksum = "b570b25f7617e43d59005d0990ccb79e950a423952cea19671b7a876da390adf" dependencies = [ "anyhow", "itertools", @@ -2804,9 +2839,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.14.3" +version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" +checksum = "f94967dc7688f3054c7fac87473ffae4cc4c3904800e2d9f5b857246d8963b0a" dependencies = [ "prost", ] @@ -3063,9 +3098,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.12.3" +version = "1.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba" dependencies = [ "aho-corasick", "memchr", @@ -3092,9 +3127,9 @@ checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973" [[package]] name = "regex-syntax" -version = "0.8.10" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4" [[package]] name = "regress" @@ -3206,9 +3241,9 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" +checksum = "dab5152771c58876a2146916e53e35057e1a4dfa2b9df0f0305b07f611fdea4d" dependencies = [ "openssl-probe", "rustls-pki-types", @@ -3389,9 +3424,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.149" +version = "1.0.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" dependencies = [ "itoa", "memchr", @@ -3461,9 +3496,9 @@ dependencies = [ [[package]] name = "shlex" -version = "1.3.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" [[package]] name = "simd-adler32" @@ -3503,9 +3538,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", "windows-sys 0.61.2", @@ -3900,9 +3935,9 @@ checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" [[package]] name = "typenum" -version = "1.20.0" +version = "1.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" +checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20" [[package]] name = "typify" @@ -3959,9 +3994,9 @@ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-segmentation" -version = "1.13.2" +version = "1.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" +checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8" [[package]] name = "unicode-width" @@ -4007,9 +4042,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "uuid" -version = "1.23.1" +version = "1.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" +checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7" dependencies = [ "getrandom 0.4.2", "js-sys", @@ -4068,9 +4103,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.121" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790" +checksum = "a254a4b10c19a76f09a27640e7ffbf9bc30bf67e16a3bf28aaefa4920fe81563" dependencies = [ "cfg-if", "once_cell", @@ -4081,9 +4116,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.71" +version = "0.4.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8" +checksum = "54568702fabf5d4849ce2b90fadfa64168a097eaf4b351ce9df8b687a0086aaf" dependencies = [ "js-sys", "wasm-bindgen", @@ -4091,9 +4126,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.121" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" +checksum = "24a40fc75b0ec6f3746ceb10d36f53a93dcd68a93b11b6445983945d79eba0dc" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4101,9 +4136,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.121" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" +checksum = "908f34bd9b9ce3d4caf07b72dfab63d61504d156856c6bd3cd87fa350cf3985b" dependencies = [ "bumpalo", "proc-macro2", @@ -4114,9 +4149,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.121" +version = "0.2.123" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441" +checksum = "7acbf7616c27b194bbb550bf77ed0c2c3e5b7fd1260a93082b95fb7f47959b92" dependencies = [ "unicode-ident", ] @@ -4170,9 +4205,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.98" +version = "0.3.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa" +checksum = "6e0871acf327f283dc6da28a1696cdc64fb355ba9f935d052021fa77f35cce69" dependencies = [ "js-sys", "wasm-bindgen", @@ -4580,9 +4615,9 @@ checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" [[package]] name = "yoke" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +checksum = "709fe23a0424b6a435d82152b1bd3fdfb0833487d5fa90d05d42762a9891fef5" dependencies = [ "stable_deref_trait", "yoke-derive", @@ -4603,18 +4638,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.48" +version = "0.8.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.48" +version = "0.8.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930" dependencies = [ "proc-macro2", "quote", @@ -4623,9 +4658,9 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" dependencies = [ "zerofrom-derive", ] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d32ba1d --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[workspace] +resolver = "2" +members = [ + "native", + "native-common", + "native-ffi", + "native-jni", +] + +# Shared package metadata so every crate moves in lock step. Members inherit +# via `version.workspace = true` / `edition.workspace = true` etc.; a single +# bump here re-versions the whole workspace. +[workspace.package] +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +repository = "https://github.com/apache/datafusion-java" + +# Every dependency used by any workspace member is declared here so version +# bumps live in one place and the resolver picks a single version of each +# crate across the workspace. Members reference these via `{ workspace = true }` +# and add per-crate flags (optional, features, default-features) at the use +# site. +[workspace.dependencies] +arrow = { version = "58", features = ["ffi"] } +async-trait = "0.1" +datafusion = { version = "53.1.0" } +datafusion-proto = "53.1.0" +datafusion-spark = "53.1.0" +datafusion-substrait = "53.1.0" +futures = "0.3" +jni = "0.21" +# Pinned to the major DataFusion 53.1 pulls in transitively (0.13.x) so we +# share the same `dyn ObjectStore` vtable and don't double-link. +object_store = { version = "0.13", default-features = false } +prost = "0.14" +prost-build = "0.14" +protoc-bin-vendored = "3" +tokio = { version = "1", features = ["rt-multi-thread"] } +# Optional, cfg-gated. See `native/Cargo.toml` for the build-flag dance. +tokio-metrics = "0.5" +url = "2" diff --git a/Makefile b/Makefile index 6d9b0ae..d6bcf2c 100644 --- a/Makefile +++ b/Makefile @@ -20,14 +20,14 @@ all: native jvm native: - cd native && cargo build + cargo build --workspace -# Build the native crate with the `runtime-metrics` Cargo feature enabled. +# Build the JNI crate with the `runtime-metrics` Cargo feature enabled. # Requires `--cfg tokio_unstable` because tokio-metrics gates its API there. # Default `make native` does not pull this in; callers who need # SessionContext.runtimeStats() pick this target explicitly. native-runtime-metrics: - cd native && RUSTFLAGS="--cfg tokio_unstable" cargo build --features runtime-metrics + RUSTFLAGS="--cfg tokio_unstable" cargo build -p datafusion-jni --features runtime-metrics jvm: ./mvnw package -DskipTests @@ -39,10 +39,10 @@ test: native # `:check` form inline in .github/workflows/lint.yml. format: ./mvnw -q spotless:apply - cd native && cargo fmt --all + cargo fmt --all clean: - cd native && cargo clean + cargo clean ./mvnw clean tpch-data: diff --git a/core/pom.xml b/core/pom.xml index 5ddf107..e589b16 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -88,7 +88,9 @@ under the License. org.apache.maven.plugins maven-surefire-plugin - --add-opens=java.base/java.nio=ALL-UNNAMED + + --add-opens=java.base/java.nio=ALL-UNNAMED -Djava.library.path=${maven.multiModuleProjectDirectory}/rust-target/${datafusion.native.profile} @@ -102,8 +104,8 @@ under the License. - + value="${maven.multiModuleProjectDirectory}/rust-target/${datafusion.native.profile}/${datafusion.lib.filename}"/> + diff --git a/core/src/main/java/org/apache/datafusion/SessionContext.java b/core/src/main/java/org/apache/datafusion/SessionContext.java index ec0bd85..b68cda5 100644 --- a/core/src/main/java/org/apache/datafusion/SessionContext.java +++ b/core/src/main/java/org/apache/datafusion/SessionContext.java @@ -113,10 +113,11 @@ public DataFrame fromProto(byte[] planBytes) { * other Substrait-emitting tool — and hand them to DataFusion without round-tripping through SQL. * *

Substrait support is gated behind the {@code substrait} Cargo feature on the native crate - * and is off by default. Rebuild the native crate with {@code cargo build - * --features substrait} (or {@code cargo build --features substrait,protoc} for hermetic builds - * that vendor {@code protoc} via {@code cmake}) to enable it. If invoked against a native binary - * built without the feature, this method throws {@link RuntimeException} pointing at the flag. + * and is off by default. Rebuild the native crate with {@code cargo build -p + * datafusion-jni --features substrait} (or {@code ... --features substrait,protoc} for hermetic + * builds that vendor {@code protoc} via {@code cmake}) to enable it. If invoked against a native + * binary built without the feature, this method throws {@link RuntimeException} pointing at the + * flag. * * @throws IllegalArgumentException if {@code planBytes} is {@code null}. * @throws IllegalStateException if this context is closed. @@ -183,7 +184,7 @@ public MemoryUsage memoryUsage() { * Rebuild with: * *

{@code
-   * RUSTFLAGS="--cfg tokio_unstable" cargo build --features runtime-metrics
+   * RUSTFLAGS="--cfg tokio_unstable" cargo build -p datafusion-jni --features runtime-metrics
    * }
* *

If invoked against a native binary built without the feature, this method throws {@link diff --git a/core/src/main/java/org/apache/datafusion/scan/DatafusionScan.java b/core/src/main/java/org/apache/datafusion/scan/DatafusionScan.java new file mode 100644 index 0000000..6a2d43b --- /dev/null +++ b/core/src/main/java/org/apache/datafusion/scan/DatafusionScan.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.scan; + +import org.apache.arrow.c.ArrowArrayStream; +import org.apache.arrow.c.ArrowSchema; +import org.apache.arrow.c.CDataDictionaryProvider; +import org.apache.arrow.c.Data; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; + +/** + * A planned scan over a DataFusion {@code TableProvider}, driven through the plain-C scan ABI. + * + *

This is the JVM-facing wrapper over {@link NativeScan}. Each scanned partition is returned as + * an {@link ArrowReader} imported from a native {@code FFI_ArrowArrayStream} through the Arrow C + * Stream interface, so record batches never pass through JNI -- they cross via the Arrow C Data + * interface that arrow-java already speaks. This mirrors {@code DataFrame#collect}. + * + *

The provider and its parameters are supplied as a serialized {@code ScanConfig}; pushed-down + * projection/filters/tuning as a serialized {@code ScanRequest}. Both are built with the generated + * protobuf classes in {@code org.apache.datafusion.protobuf}. + * + *

Not thread-safe with respect to {@link #close()}: callers must not close a scan while a + * partition execute is in flight on another thread. + */ +public final class DatafusionScan implements AutoCloseable { + + private final long handle; + private boolean closed; + + private DatafusionScan(long handle) { + this.handle = handle; + } + + /** + * Probe a provider's output schema without planning a scan. + * + * @param allocator allocator for the transient C schema struct + * @param provider registered builder name (e.g. {@code datafusion.listing}) + * @param config serialized {@code ScanConfig} + */ + public static Schema schema(BufferAllocator allocator, String provider, byte[] config) { + ArrowSchema cSchema = ArrowSchema.allocateNew(allocator); + CDataDictionaryProvider dictionaries = new CDataDictionaryProvider(); + NativeScan.providerSchema(provider, config, cSchema.memoryAddress()); + // importField takes ownership of the C struct and returns the struct-typed + // root; its children are the table's columns. + Field root = Data.importField(allocator, cSchema, dictionaries); + return new Schema(root.getChildren()); + } + + /** + * Plan a scan over {@code provider}. + * + * @param provider registered builder name + * @param config serialized {@code ScanConfig} + * @param scanRequest serialized {@code ScanRequest}, or {@code null}/empty for no pushdown + */ + public static DatafusionScan create(String provider, byte[] config, byte[] scanRequest) { + byte[] request = scanRequest == null ? new byte[0] : scanRequest; + return new DatafusionScan(NativeScan.createScan(provider, config, request)); + } + + /** Number of output partitions this scan produces. */ + public int partitionCount() { + return NativeScan.partitionCount(handle); + } + + /** + * Execute one partition. The returned {@link ArrowReader} owns the underlying stream; close it + * when done. Safe to call concurrently for distinct partitions. + */ + public ArrowReader executePartition(BufferAllocator allocator, int partition) { + ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator); + NativeScan.executeStreamPartition(handle, partition, stream.memoryAddress()); + return Data.importArrayStream(allocator, stream); + } + + /** Execute the whole plan as a single coalesced reader. */ + public ArrowReader execute(BufferAllocator allocator) { + ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator); + NativeScan.executeStream(handle, stream.memoryAddress()); + return Data.importArrayStream(allocator, stream); + } + + @Override + public synchronized void close() { + if (closed) { + return; + } + closed = true; + NativeScan.closeScan(handle); + } +} diff --git a/core/src/main/java/org/apache/datafusion/scan/NativeScan.java b/core/src/main/java/org/apache/datafusion/scan/NativeScan.java new file mode 100644 index 0000000..31093d4 --- /dev/null +++ b/core/src/main/java/org/apache/datafusion/scan/NativeScan.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.scan; + +/** + * Raw native bindings to the {@code datafusion_scan_jni} shim. + * + *

Every method is a thin pass-through to the in-process scan core. Arrow data is never marshaled + * across this boundary: the {@code *Addr} arguments are the memory addresses of {@code + * org.apache.arrow.c.ArrowSchema} / {@code ArrowArrayStream} structs allocated by arrow-java, which + * the native side fills in place. Callers should use {@link DatafusionScan} rather than these + * directly. + */ +final class NativeScan { + + static { + ScanNativeLoader.load(); + } + + private NativeScan() {} + + /** Probe a provider's output schema into the {@code ArrowSchema} at {@code schemaAddr}. */ + static native void providerSchema(String provider, byte[] config, long schemaAddr); + + /** + * Plan a scan. Returns an opaque handle; release it with {@link #closeScan(long)}. + * + * @param provider registered builder name (e.g. {@code datafusion.listing}) + * @param config serialized {@code ScanConfig} + * @param scanRequest serialized {@code ScanRequest} (pushdown), or empty for none + */ + static native long createScan(String provider, byte[] config, byte[] scanRequest); + + /** Output partition count of a planned scan. */ + static native int partitionCount(long handle); + + /** Execute one partition into the {@code ArrowArrayStream} at {@code streamAddr}. */ + static native void executeStreamPartition(long handle, int partition, long streamAddr); + + /** Execute the whole plan as one coalesced stream into {@code streamAddr}. */ + static native void executeStream(long handle, long streamAddr); + + /** Drop a planned scan. Null-safe. */ + static native void closeScan(long handle); +} diff --git a/core/src/main/java/org/apache/datafusion/scan/ScanNativeLoader.java b/core/src/main/java/org/apache/datafusion/scan/ScanNativeLoader.java new file mode 100644 index 0000000..6540ce4 --- /dev/null +++ b/core/src/main/java/org/apache/datafusion/scan/ScanNativeLoader.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.scan; + +/** + * Loads the {@code datafusion_scan_jni} shim library. + * + *

This is the JVM adapter over the plain-C scan ABI exported by {@code + * datafusion-scan-ffi}. The library is loaded from {@code java.library.path} (set it with {@code + * -Djava.library.path=...} or the platform library-path environment variable so it can find the + * built {@code libdatafusion_scan_jni}). Classpath bundling, as the core {@code datafusion_jni} + * library does, is left to release packaging. + */ +final class ScanNativeLoader { + + private static final String LIBRARY_NAME = "datafusion_scan_jni"; + + private static volatile boolean loaded; + + private ScanNativeLoader() {} + + static synchronized void load() { + if (loaded) { + return; + } + System.loadLibrary(LIBRARY_NAME); + loaded = true; + } +} diff --git a/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java b/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java index 120d179..d567275 100644 --- a/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java +++ b/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java @@ -37,7 +37,7 @@ * #checkFeatureEnabled}. Run * *

{@code
- * (cd native && RUSTFLAGS="--cfg tokio_unstable" cargo build --features runtime-metrics)
+ * RUSTFLAGS="--cfg tokio_unstable" cargo build -p datafusion-jni --features runtime-metrics
  * }
* * before {@code ./mvnw test} to exercise this class. diff --git a/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java b/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java index 34db3b5..a2cfb0a 100644 --- a/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java +++ b/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java @@ -50,7 +50,7 @@ * *

The {@code substrait} Cargo feature is off by default in {@code native/Cargo.toml}; if the * native crate was built without it, every test here is skipped (see {@link #checkFeatureEnabled}). - * Run {@code (cd native && cargo build --features substrait)} before {@code ./mvnw test} to + * Run {@code cargo build -p datafusion-jni --features substrait} before {@code ./mvnw test} to * exercise this class. */ class SessionContextSubstraitTest { diff --git a/core/src/test/java/org/apache/datafusion/scan/DatafusionScanTest.java b/core/src/test/java/org/apache/datafusion/scan/DatafusionScanTest.java new file mode 100644 index 0000000..2cf61f7 --- /dev/null +++ b/core/src/test/java/org/apache/datafusion/scan/DatafusionScanTest.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.scan; + +import static java.util.stream.Collectors.toList; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import com.google.protobuf.ByteString; +import datafusion_common.DatafusionCommon.Column; +import datafusion_common.DatafusionCommon.ScalarValue; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.datafusion.protobuf.BinaryExprNode; +import org.apache.datafusion.protobuf.CsvReadOptionsProto; +import org.apache.datafusion.protobuf.ListingSource; +import org.apache.datafusion.protobuf.LogicalExprNode; +import org.apache.datafusion.protobuf.ScanConfig; +import org.apache.datafusion.protobuf.ScanRequest; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * End-to-end exercise of the JNI shim: drive the {@code datafusion.listing} provider over a CSV + * entirely from Java, confirming the Arrow C Stream produced by arrow-rs imports cleanly through + * arrow-java's {@code Data.importArrayStream}. This is the proof that the C Stream ABI matches + * across the two Arrow implementations through this path. + */ +class DatafusionScanTest { + + private static final String PROVIDER = "datafusion.listing"; + + @TempDir Path tmp; + + /** Build a ScanConfig for a CSV listing source, using the generated protobuf builders. */ + private byte[] csvConfig(String path) { + return ScanConfig.newBuilder() + .setProvider(PROVIDER) + .setListing( + ListingSource.newBuilder() + .addPaths(path) + .setCsv( + CsvReadOptionsProto.newBuilder() + .setHasHeader(true) + .setDelimiter(',') + .setQuote('"') + .setFileExtension(".csv") + .build()) + .build()) + .build() + .toByteArray(); + } + + @Test + void inferredSchemaMatchesCsvHeader() throws Exception { + Path csv = tmp.resolve("data.csv"); + Files.writeString(csv, "id,name\n1,a\n2,b\n3,c\n"); + byte[] config = csvConfig(csv.toString()); + + try (BufferAllocator allocator = new RootAllocator()) { + Schema schema = DatafusionScan.schema(allocator, PROVIDER, config); + List names = schema.getFields().stream().map(Field::getName).collect(toList()); + assertEquals(List.of("id", "name"), names); + } + } + + @Test + void scansCsvRowsThroughArrowCStream() throws Exception { + Path csv = tmp.resolve("data.csv"); + Files.writeString(csv, "id,name\n1,a\n2,b\n3,c\n"); + byte[] config = csvConfig(csv.toString()); + + try (BufferAllocator allocator = new RootAllocator(); + DatafusionScan scan = DatafusionScan.create(PROVIDER, config, null)) { + assertTrue(scan.partitionCount() >= 1, "expected at least one partition"); + + long total = 0; + int rows = 0; + try (ArrowReader reader = scan.execute(allocator)) { + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + while (reader.loadNextBatch()) { + rows += root.getRowCount(); + BigIntVector ids = (BigIntVector) root.getVector("id"); + for (int i = 0; i < root.getRowCount(); i++) { + total += ids.get(i); + } + } + } + assertEquals(3, rows); + assertEquals(1 + 2 + 3, total); + } + } + + @Test + void projectionPrunesColumns() throws Exception { + byte[] config = csvConfig(writeCsv()); + // Pushed projection: keep only "name". + byte[] request = ScanRequest.newBuilder().addProjection("name").build().toByteArray(); + + try (BufferAllocator allocator = new RootAllocator(); + DatafusionScan scan = DatafusionScan.create(PROVIDER, config, request)) { + int rows = 0; + try (ArrowReader reader = scan.execute(allocator)) { + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + List cols = + root.getSchema().getFields().stream().map(Field::getName).collect(toList()); + assertEquals(List.of("name"), cols, "projection should drop the id column"); + while (reader.loadNextBatch()) { + rows += root.getRowCount(); + } + } + assertEquals(3, rows); + } + } + + @Test + void filterPushdownSelectsRows() throws Exception { + byte[] config = csvConfig(writeCsv()); + // Pushed filter: id >= 2. + byte[] request = + ScanRequest.newBuilder().addFilters(ByteString.copyFrom(idAtLeast(2))).build().toByteArray(); + + try (BufferAllocator allocator = new RootAllocator(); + DatafusionScan scan = DatafusionScan.create(PROVIDER, config, request)) { + long total = 0; + int rows = 0; + try (ArrowReader reader = scan.execute(allocator)) { + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + while (reader.loadNextBatch()) { + rows += root.getRowCount(); + BigIntVector ids = (BigIntVector) root.getVector("id"); + for (int i = 0; i < root.getRowCount(); i++) { + total += ids.get(i); + } + } + } + assertEquals(2, rows, "only id 2 and 3 pass the filter"); + assertEquals(2 + 3, total); + } + } + + @Test + void limitCapsRows() throws Exception { + byte[] config = csvConfig(writeCsv()); + // Pushed limit of 2 over the 3-row CSV. + byte[] request = ScanRequest.newBuilder().setLimit(2).build().toByteArray(); + + try (BufferAllocator allocator = new RootAllocator(); + DatafusionScan scan = DatafusionScan.create(PROVIDER, config, request)) { + int rows = 0; + try (ArrowReader reader = scan.execute(allocator)) { + VectorSchemaRoot root = reader.getVectorSchemaRoot(); + while (reader.loadNextBatch()) { + rows += root.getRowCount(); + } + } + assertEquals(2, rows, "limit should cap the scan at 2 rows"); + } + } + + /** Serialize the LogicalExprNode for {@code id >= value}, as the engine's filter pushdown would. */ + private static byte[] idAtLeast(long value) { + LogicalExprNode column = + LogicalExprNode.newBuilder().setColumn(Column.newBuilder().setName("id")).build(); + LogicalExprNode literal = + LogicalExprNode.newBuilder() + .setLiteral(ScalarValue.newBuilder().setInt64Value(value)) + .build(); + return LogicalExprNode.newBuilder() + .setBinaryExpr( + BinaryExprNode.newBuilder().addOperands(column).addOperands(literal).setOp("GtEq")) + .build() + .toByteArray(); + } + + private String writeCsv() throws Exception { + Path csv = tmp.resolve("data.csv"); + Files.writeString(csv, "id,name\n1,a\n2,b\n3,c\n"); + return csv.toString(); + } +} diff --git a/dev/release/build-release.sh b/dev/release/build-release.sh index 2b033bb..4d4ab13 100755 --- a/dev/release/build-release.sh +++ b/dev/release/build-release.sh @@ -135,26 +135,28 @@ JVM_TARGET_DIR="$PROJECT_HOME/core/target/classes/org/apache/datafusion" mkdir -p "$JVM_TARGET_DIR/linux/amd64" docker cp \ - "$CONTAINER_AMD64:/opt/datafusion-java-rm/datafusion-java/native/target/release/libdatafusion_jni.so" \ + "$CONTAINER_AMD64:/opt/datafusion-java-rm/datafusion-java/rust-target/release/libdatafusion_jni.so" \ "$JVM_TARGET_DIR/linux/amd64/" mkdir -p "$JVM_TARGET_DIR/linux/aarch64" docker cp \ - "$CONTAINER_ARM64:/opt/datafusion-java-rm/datafusion-java/native/target/release/libdatafusion_jni.so" \ + "$CONTAINER_ARM64:/opt/datafusion-java-rm/datafusion-java/rust-target/release/libdatafusion_jni.so" \ "$JVM_TARGET_DIR/linux/aarch64/" echo "Building macOS native libs on the host (host=$HOST_ARCH)" rustup target add "$OTHER_DARWIN_TARGET" -(cd "$PROJECT_HOME/native" && cargo build --release) -(cd "$PROJECT_HOME/native" && cargo build --release --target "$OTHER_DARWIN_TARGET") +# Cargo writes to the workspace `rust-target/` dir (set in .cargo/config.toml), +# not the per-crate `native/target/`, so build from the repo root. +(cd "$PROJECT_HOME" && cargo build --release -p datafusion-jni) +(cd "$PROJECT_HOME" && cargo build --release -p datafusion-jni --target "$OTHER_DARWIN_TARGET") mkdir -p "$JVM_TARGET_DIR/darwin/$HOST_DARWIN_DIR" -cp "$PROJECT_HOME/native/target/release/libdatafusion_jni.dylib" \ +cp "$PROJECT_HOME/rust-target/release/libdatafusion_jni.dylib" \ "$JVM_TARGET_DIR/darwin/$HOST_DARWIN_DIR/" mkdir -p "$JVM_TARGET_DIR/darwin/$OTHER_DARWIN_DIR" -cp "$PROJECT_HOME/native/target/$OTHER_DARWIN_TARGET/release/libdatafusion_jni.dylib" \ +cp "$PROJECT_HOME/rust-target/$OTHER_DARWIN_TARGET/release/libdatafusion_jni.dylib" \ "$JVM_TARGET_DIR/darwin/$OTHER_DARWIN_DIR/" echo "Installing JAR into local Maven repo" diff --git a/dev/release/datafusion-java-rm/build-native-libs.sh b/dev/release/datafusion-java-rm/build-native-libs.sh index 5f273cc..79f8ae0 100755 --- a/dev/release/datafusion-java-rm/build-native-libs.sh +++ b/dev/release/datafusion-java-rm/build-native-libs.sh @@ -38,8 +38,9 @@ git clone "$REPO" datafusion-java cd datafusion-java git checkout "$BRANCH" -cd native -cargo build --release +# Cargo writes to the workspace `rust-target/` dir (set in .cargo/config.toml), +# not the per-crate `native/target/`, so build from the repo root. +cargo build --release -p datafusion-jni -echo "Built $(pwd)/target/release/libdatafusion_jni.so" -ls -l target/release/libdatafusion_jni.so +echo "Built $(pwd)/rust-target/release/libdatafusion_jni.so" +ls -l rust-target/release/libdatafusion_jni.so diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 81d83e8..3dbd90f 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -7,7 +7,7 @@ .mvn/wrapper/maven-wrapper.properties mvnw mvnw.cmd -native/Cargo.lock +Cargo.lock dev/release/rat_exclude_files.txt docs/source/_static/** docs/source/conf.py diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index e486adc..c7767bf 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -150,7 +150,8 @@ test_source_distribution() { # raises on any formatting errors rustup component add rustfmt - (cd native && cargo fmt --all -- --check) + # Workspace-wide: covers native, native-common, and any future members. + cargo fmt --all -- --check # build native + JVM and run the full test suite make test diff --git a/docs/datafusion-spark-design.md b/docs/datafusion-spark-design.md new file mode 100644 index 0000000..1019f3b --- /dev/null +++ b/docs/datafusion-spark-design.md @@ -0,0 +1,304 @@ +# DataFusion-backed Spark DataSource: design + +## Goal + +Let Spark read from a DataFusion `TableProvider` as a native `DataSourceV2`, +with the native boundary placed at the **Arrow C Data / C Stream interface and +plain C types** — not at handwritten JNI per operation. + +## Origin + +On [PR #104](https://github.com/apache/datafusion-java/pull/104), Dewey +Dunnington (@paleolimbot) reviewed an earlier stack (PR #103) whose cdylib +exported JNI entry points directly, and argued for a cleaner shape: + +> build a cdylib that exports entrypoints that just use the Arrow C Data/Stream +> interface and C types. That also has broader applicability to non-Java (i.e., +> can live in datafusion proper and get eyes/reviews from a wider audience). + +This design follows that: the reusable artifact is a **plain-C scan ABI** over +Arrow C types; JNI is a thin, separable adapter; the same ABI is callable from +Python/Go/Rust/FFM. "Approach A" — the providers we ship are compiled into the +cdylib and selected by name, rather than imported over `datafusion-ffi`. + +## Principle: two planes, both zero-copy + +| Plane | Carries | Crosses via | +| --- | --- | --- | +| **Data** | Arrow record batches | Arrow C Stream (`FFI_ArrowArrayStream`) → arrow-java import → Spark `ArrowColumnVector` | +| **Control** | provider name, config, pushdown, partition index | plain-C calls passing `(ptr, len)` and `long` addresses | + +No Arrow data is ever marshaled through JNI. Batches flow through the Arrow C +Data interface, which arrow-java and arrow-rs already speak; the JVM gets real +Arrow vectors and hands them to Spark with no per-cell copy. + +## Architecture + +``` + spark.read.format("datafusion").option("path", ...).load() + │ + ▼ datafusion-spark (Maven module, Java, Spark 4.0) + │ TableProvider → Table → ScanBuilder (projection / filter / limit pushdown) + │ → Scan/Batch → InputPartition[] (serializable: config + request bytes + index) + │ → PartitionReaderFactory → ColumnarPartitionReader + │ + ▼ core: org.apache.datafusion.scan.DatafusionScan (JVM scan API) + │ + NativeScan (6 JNI methods) ──loads──► libdatafusion_scan_jni + │ + ▼ native-jni: datafusion-scan-jni (cdylib) ← thin JVM adapter + │ Java_…_NativeScan_* → calls the scan core; writes FFI_ArrowArrayStream + │ into the address arrow-java allocated + │ + ▼ native-ffi: datafusion-scan-ffi (cdylib + rlib) ← the reusable plain-C ABI + │ df_scan_* (extern "C") → scan core → registered provider builder + │ data plane: FFI_ArrowArrayStream (arrow-rs) + │ + ▼ DataFusion: TableProvider (e.g. ListingTable) reads the source +``` + +Non-Java consumers (Python/Go/Rust/FFM) bind `df_scan_*` directly and skip the +JNI and Spark layers entirely. + +## Components + +| Path | Crate / module | Role | +| --- | --- | --- | +| `native-ffi/` | `datafusion-scan-ffi` (cdylib + rlib) | The plain-C scan ABI; scan core; provider registry; demo + `datafusion.listing` providers | +| `native-jni/` | `datafusion-scan-jni` (cdylib) | Thin JNI shim over the scan core | +| `core/.../scan/` | part of `datafusion-java` | `NativeScan` (native decls), `ScanNativeLoader`, `DatafusionScan` (JVM API) | +| `spark/` | `datafusion-spark` (Java) | The Spark `DataSourceV2` connector | +| `proto/` | shared | `scan_config.proto`, `scan_request.proto` | + +## The plain-C ABI (`native-ffi/include/datafusion_scan.h`) + +```c +uint64_t df_scan_abi_version(void); +void df_error_free(char* err); + +int32_t df_scan_schema(DfStr provider, DfBytes options, DfBytes partition, + struct ArrowSchema* out_schema, char** out_err); +int32_t df_scan_create(DfStr provider, DfBytes options, DfBytes partition, + int32_t target_partitions, int32_t batch_size, int64_t limit, + const DfKeyValue* config_overrides, size_t config_overrides_len, + const DfStr* projection, size_t projection_len, + const DfBytes* filters, size_t filters_len, + DfScanHandle** out_handle, char** out_err); +int32_t df_scan_partition_count(const DfScanHandle*, int32_t* out_count, char** out_err); +int32_t df_scan_execute_partition(const DfScanHandle*, int32_t partition, + struct ArrowArrayStream* out_stream, char** out_err); +int32_t df_scan_execute(const DfScanHandle*, struct ArrowArrayStream* out_stream, char** out_err); +void df_scan_close(DfScanHandle*); +``` + +Conventions: every fallible call returns `0` / nonzero `DfStatus`, writing a +malloc'd message to `*out_err` (freed by `df_error_free`). The only "rich" types +crossing are the standard Arrow C structs `ArrowSchema` / `ArrowArrayStream`. +Each call is wrapped in `catch_unwind` so a Rust panic becomes a status code, +never an unwind across the C boundary. + +Providers are registered by name (`register_provider`) and select via the +`provider` argument; the `options`/`partition` blobs are opaque to the ABI and +decoded by the registered builder. + +## Wire formats (`proto/`) + +- **`ScanConfig`** — the `options` blob: `provider` name + a `source` oneof + (`ListingSource` reusing the per-format read-option messages, or a `custom` + bytes escape hatch). `ScanPartition` is the per-partition `partition` blob. +- **`ScanRequest`** — the engine's pushdown: `projection` (column names), + `filters` (each a serialized `datafusion.LogicalExprNode`), `limit`, + `target_partitions`, `batch_size`, `config_overrides`. + +`ScanRequest` is decoded by the JNI shim and exploded into `df_scan_create`'s +typed C arguments, rather than passed as one blob — keeping the C ABI typed and +FFM-friendly. Filters reuse DataFusion's own `LogicalExprNode` proto, so the +Java side generates builders and the Rust side decodes with the stock codec from +the same `.proto` — and the encoding is shared with any future Comet path. + +## JNI shim (`native-jni` + `core/.../scan`) + +Six `Java_…NativeScan_*` methods: `providerSchema`, `createScan`, +`partitionCount`, `executeStreamPartition`, `executeStream`, `closeScan`. Each +marshals a `String` + `byte[]`s and `long` addresses; the data plane writes an +`FFI_ArrowArrayStream` into the arrow-java-allocated struct. `DatafusionScan` +wraps these and returns an `ArrowReader` via `Data.importArrayStream`, mirroring +`core`'s existing `DataFrame#collect`. + +## Arrow version strategy (the key integration decision) + +`ArrowColumnVector` is zero-copy only if the vectors we hand it are the **same +arrow-java classes** Spark loaded — i.e. one Arrow in the executor JVM. So the +connector treats arrow-java as **`provided`**: the cluster supplies it, our +stream import and Spark's `ArrowColumnVector` share it, and columnar works with +whatever Arrow the deployment ships (within an API-compatible window of the +compile baseline, currently Spark 4.0's Arrow 18.1). + +Consequences: + +- **`datafusion-java` (core) stays on Arrow 19** for standalone use; only its + Arrow transitive is excluded from the Spark module. No main downgrade. +- **The Rust side is unaffected.** The Arrow C Data interface is a stable spec, + independent of Arrow library version: `arrow-rs 58` producing an + `FFI_ArrowArrayStream` imports into arrow-java 18 or 19 alike. Verified by the + JVM round-trip test. + +## Spark DataSourceV2 mapping + +| Spark interface | Our class | Behaviour | +| --- | --- | --- | +| `TableProvider`, `DataSourceRegister` | `DatafusionTableProvider` | `"datafusion"` short name; `inferSchema` probes via `df_scan_schema` | +| `Table`, `SupportsRead` | `DatafusionTable` | `BATCH_READ` capability | +| `ScanBuilder` + `SupportsPushDown{RequiredColumns,Filters,Limit}` | `DatafusionScanBuilder` | encodes projection / filters / limit into `ScanRequest` | +| `Scan`, `Batch` | `DatafusionScanImpl` | plans once on the driver for partition count | +| `InputPartition` | `DatafusionInputPartition` | **serializable**: carries config + request bytes + index, never a native handle | +| `PartitionReaderFactory` | `DatafusionPartitionReaderFactory` | columnar reads | +| `PartitionReader` | `DatafusionColumnarPartitionReader` | wraps imported Arrow vectors in `ArrowColumnVector`, zero-copy | + +Helpers: `OptionsCodec` (Spark options → `ScanConfig`), `SchemaConverter` (Arrow +schema → Spark `StructType`, using only our Arrow types), `SparkFilters` (Spark +`Filter`s → `LogicalExprNode`: comparisons, `And`/`Or`/`Not`, `IsNull`/ +`IsNotNull` over primitive literals; anything else falls back to Spark). + +**Partition serialization constraint:** a native handle is meaningless in +another executor process, so partitions carry only bytes + an index, and each +executor rebuilds the provider and runs its own partition. A limited plan +coalesces to one partition, so `pushLimit` can report the bound as fully +handled. + +## Testing + +| Level | Where | Proves | +| --- | --- | --- | +| Rust ABI round-trip | `native-ffi/tests/roundtrip.rs` | `df_scan_*` + import the stream back via the Arrow C Stream interface; partition count; limit; error/status | +| Rust proto | `native-ffi/tests/proto.rs` | `ScanConfig`/`ScanRequest` encode/decode incl. embedded read-options | +| Rust listing | `native-ffi/tests/listing.rs` | real `ListingTable` over a CSV, schema inference, full scan | +| JVM scan | `core/.../scan/DatafusionScanTest` | end-to-end Java → JNI → Arrow C Stream; schema, scan, projection, filter, limit (closes the arrow-rs 58 ↔ arrow-java 19 ABI question) | +| Spark unit | `spark/.../DatafusionScanBuilderTest` | decodes the built `ScanRequest` to prove pushdown is actually encoded (isolated from Spark's own handling) | +| Spark E2E | `spark/.../DatafusionSourceTest` | local `SparkSession` over `format("datafusion")`: schema, full scan, projection, filter, limit on Spark 4.0 columnar | + +## Decisions log + +- **Approach A over `datafusion-ffi` import.** `datafusion-ffi` already exposes + the whole `TableProvider`, but over stabby vtables + an async, poll-based + `FFI_RecordBatchStream` — not Java-consumable and not flat C. Compiling + providers in and exporting flat C is simpler and is exactly the shape Dewey + asked for. The async surface would only be needed to load *third-party* + provider cdylibs (a future option B). +- **Plain C + thin JNI, not JNI-in-the-cdylib.** Keeps the reusable artifact + language-neutral and upstreamable; quarantines the JVM into a ~6-method shim. +- **Row-based → columnar.** Shipped row-based first to decouple from Spark's + Arrow, then moved to columnar once the `provided`-Arrow strategy removed the + version clash. Columnar is zero-copy; row-based is gone. +- **Spark 4.0 / Arrow 18.1 baseline, Java.** Java matches the rest of the stack; + Spark 4.0's Arrow (18.1) is close to ours and Java-17 native. + +## Status and gaps + +Built and green end to end: the plain-C ABI, the JNI shim, and a columnar Spark +4.0 connector with projection / filter / limit pushdown. + +Not yet done: + +- **Multi-partition coverage.** The executor-rebuild path is wired but exercised + only at one partition (single CSV); a directory/Parquet test would cover N>1. +- **Native library packaging.** The shim loads from `java.library.path`; + classpath bundling per OS/arch (as `core` does for `datafusion_jni`) is left + to release packaging. +- **Format breadth.** CSV options are fully mapped; Parquet/Avro/Arrow use + defaults. +- **External provider cdylibs (option B).** Loading third-party providers over + `datafusion-ffi`'s `ForeignTableProvider` is not implemented. + +## Alternative / companion front-end: ADBC + +A reviewer suggested exposing arbitrary DataFusion `TableProvider`s over +[ADBC](https://arrow.apache.org/adbc/) (Arrow Database Connectivity) instead of — +or alongside — this scan ABI. The two are not mutually exclusive: they are two +front-ends over the same core, serving different consumers. + +### What this PR's work reuses + +The PR already cleaves at the right seam. Three layers, and the valuable two are +front-end-agnostic: + +| Layer | ADBC reuse | +| --- | --- | +| Exec core (`scan.rs`, `reader.rs`, `runtime.rs`) — build provider → register on `SessionContext` → plan → `ExecutionPlan` → partition stream → `FFI_ArrowArrayStream` | **Direct reuse.** Already JVM-free and C-free. | +| Provider registry (`registry.rs`) — register `TableProvider` by name, build on demand | **Direct reuse.** This *is* the "arbitrary providers" mechanism. | +| `native-common` (errors, tokio handle); panic→status `catch_unwind` pattern | Reuse concept; ADBC has its own error struct. | +| `df_scan_*` flat C ABI, proto pushdown (`ScanRequest` / `SparkFilters` / `LogicalExprNode`), JNI shim, `core/scan/*`, `spark/*` | **Not reused.** Scan-, JVM-, and Spark-specific. | + +`reader.rs`'s `StreamingReader` (DataFusion `SendableRecordBatchStream` → +`ArrowArrayStream`) is exactly what ADBC's `AdbcStatementExecuteQuery` returns: +the data plane is identical, and the cross-implementation Arrow C Stream question +this PR already answered carries over unchanged. + +### What ADBC adds, and what it drops + +ADBC mandates a fixed, large C surface — `AdbcDatabase` / `AdbcConnection` / +`AdbcStatement` lifecycle, option getters/setters, metadata calls, an +`AdbcDriverInit` entry point. You do **not** hand-write that vtable: the official +`adbc_core` Rust crate supplies `Database` / `Connection` / `Statement` traits +plus an `export_driver!` macro that generates the C ABI. So the FFI layer becomes +trait glue, not a second hand-written boundary. + +New work: + +- `adbc_core` dependency + three trait impls. `Database` holds config + registered + providers; `Connection` wraps a `SessionContext`; `Statement` holds SQL + bound + params and, on execute, runs `ctx.sql(q)` → physical plan → the existing + `StreamingReader`. +- Catalog metadata methods (`GetObjects` / `GetTableSchema` / `GetTableTypes` / + `GetInfo`) → DataFusion `CatalogProvider` / `SchemaProvider` introspection. +- ADBC error / status mapping in place of `DfStatus`. +- Optional: parameter binding / prepared statements; `ExecutePartitions` (maps + cleanly onto the existing plan-partition logic); ingest/write (likely out of + scope). +- Driver packaging (a manifest so `adbc_driver_manager` can load the library). + +Dropped relative to the Spark path: the protobuf pushdown machinery +(`ScanRequest`, `SparkFilters`, `LogicalExprNode` encoding) is unneeded — ADBC +clients send SQL and DataFusion's optimizer does pushdown internally — as are the +JNI shim, `core/scan`, and the Spark module. + +### Suggested layout for both + +``` +native-common/ errors, tokio runtime [shared] +native-exec-core/ provider registry + plan/exec [shared] ← lift scan.rs/reader.rs/registry.rs here + ├─ native-ffi/ df_scan_* flat C (+ JNI/Spark) [exists] + └─ native-adbc/ adbc_core trait impls [new] +``` + +One refactor on the existing side: lift `scan.rs` / `reader.rs` / `registry.rs` +out of `native-ffi` into a shared `native-exec-core` crate that both front-ends +depend on; `native-ffi` keeps only `abi.rs` + proto. Low churn — those modules +are already free of C/JVM concerns by design. + +### Why keep both rather than collapse to one + +Different consumers. `df_scan_*` is a bespoke, scan-only ABI with **explicit** +pushdown: every consumer hand-binds it, but it can carry Spark's pre-resolved +predicates without a SQL round-trip. ADBC is a SQL-oriented **standard** ABI: +bigger mandated surface, but the whole client ecosystem (Python +`adbc_driver_manager`, R, Go, the JDBC↔ADBC bridge) comes for free. + +They are not redundant, because Spark's pre-resolved pushdown does not always +re-serialize to a SQL string: + +- **Lossy but rescuable** (within current filter scope): float/double literals + (decimal-text render loses exact IEEE bits), `NaN`/`±Inf` (no SQL literal), + decimal precision/scale, binary/non-UTF8 literals, null-safe equality + (`<=>` → `IS NOT DISTINCT FROM`), identifier quoting/case. ADBC parameter + binding (`WHERE col = ?` with a typed bound value) closes most of the literal + cases. +- **Structurally impossible**: pushdown whose value is not known at + statement-prepare time — dynamic partition pruning, runtime/bloom filters from + joins — cannot be a static SQL string, and binding does not help because the + value arrives mid-execution. This PR pushes none of these yet, but it is the + reason a typed-`Expr` scan ABI is not merely a convenience over SQL: it is the + only path that can carry runtime filters at all. + +So the recommendation is a shared `native-exec-core` with two thin front-ends: +ADBC for SQL clients across the Arrow ecosystem, the flat-C scan ABI for +embedders (Spark today) that push pre-resolved or runtime predicates. diff --git a/docs/source/contributor-guide/development.md b/docs/source/contributor-guide/development.md index 984d77c..61d4fb0 100644 --- a/docs/source/contributor-guide/development.md +++ b/docs/source/contributor-guide/development.md @@ -42,7 +42,7 @@ This builds the native Rust crate and runs the JUnit tests. The steps can be run individually: ```sh -cd native && cargo build +cargo build --workspace ./mvnw test ``` @@ -74,6 +74,11 @@ disk space. The repository is a multi-module Maven build: +- `Cargo.toml` — Rust workspace root declaring the crate members + (`native`, `native-common`) and `[workspace.dependencies]` that pin + shared versions in one place. Cargo writes artifacts to `rust-target/` + (overridden in `.cargo/config.toml`) so `mvn clean` at the repo root does + not nuke the Rust build cache. - `pom.xml` — parent POM declaring the `core` and `examples` modules and shared plugin/dependency versions. - `core/` — `datafusion-java` library module (Java sources, tests, and @@ -81,7 +86,10 @@ The repository is a multi-module Maven build: - `examples/` — `datafusion-java-examples` module containing runnable examples that depend on the library; built alongside the library so they cannot fall out of sync with the API. -- `native/` — Rust crate (JNI + Arrow C Data Interface). +- `native/` — `datafusion-jni` Rust crate (JNI + Arrow C Data Interface). +- `native-common/` — `datafusion-jni-common` Rust crate: JNI plumbing + shared across native crates (error→exception mapping, the per-cdylib + Tokio runtime singleton, the async-stream→`FFI_ArrowArrayStream` bridge). - `proto/` — Protobuf definitions shared between Java and Rust. - `Makefile` — top-level build orchestration (`make test`, `make format`, `make tpch-data`). diff --git a/docs/source/contributor-guide/updating-datafusion-version.md b/docs/source/contributor-guide/updating-datafusion-version.md index 56d50dc..6e3b90b 100644 --- a/docs/source/contributor-guide/updating-datafusion-version.md +++ b/docs/source/contributor-guide/updating-datafusion-version.md @@ -21,7 +21,9 @@ under the License. Three things must move together when bumping DataFusion: -1. `native/Cargo.toml` — the `datafusion` crate dependency. +1. `Cargo.toml` (workspace root) — the `datafusion`, `datafusion-proto`, + `datafusion-spark`, and `datafusion-substrait` entries in + `[workspace.dependencies]`. Members inherit from there. 2. `pom.xml` — the `` Maven property. **Must equal the Cargo version**; a mismatch means JVM-built protobuf plans won't deserialize on the native side. @@ -32,9 +34,9 @@ Three things must move together when bumping DataFusion: ## Recipe ```sh -# 1. Bump the Cargo dep -$EDITOR native/Cargo.toml # set datafusion = "" -(cd native && cargo update -p datafusion) +# 1. Bump the workspace dep +$EDITOR Cargo.toml # set datafusion = "" in [workspace.dependencies] +cargo update -p datafusion # 2. Bump the Maven property to match $EDITOR pom.xml # set diff --git a/native-common/Cargo.toml b/native-common/Cargo.toml new file mode 100644 index 0000000..21a2296 --- /dev/null +++ b/native-common/Cargo.toml @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "datafusion-jni-common" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +# Implementation detail of datafusion-java's native crates, not a standalone +# crates.io library. Matches `publish = false` on the `datafusion-jni` crate. +publish = false +readme = "README.md" +description = "Shared JNI plumbing for DataFusion Java native crates: error-to-exception mapping, the per-cdylib Tokio runtime singleton, and the async-stream-to-FFI_ArrowArrayStream bridge." + +[features] +# `datafusion-jni` builds DataFusion with `avro`, which adds the +# `DataFusionError::AvroError` variant our classifier maps to IoException. +# Feature-forwarded so consumers that don't read Avro (the Spark helper) +# don't pull the apache-avro stack into their cdylib. +avro = ["datafusion/avro"] + +[dependencies] +datafusion = { workspace = true } +futures = { workspace = true } +jni = { workspace = true } +tokio = { workspace = true } diff --git a/native-common/README.md b/native-common/README.md new file mode 100644 index 0000000..aadf877 --- /dev/null +++ b/native-common/README.md @@ -0,0 +1,37 @@ + + +# datafusion-jni-common + +Shared JNI plumbing for the [Apache DataFusion Java](https://github.com/apache/datafusion-java) +native crates. It holds the pieces every DataFusion-backed `cdylib` loaded into a +JVM needs, factored out so they live in one place. + +## Linking model + +Each consuming `cdylib` statically links its own copy of this crate, so the +runtime singleton is per-library, not per-process. Nothing here is exported with +`#[no_mangle]`, so linking it into several `cdylib`s loaded in one JVM cannot +collide. + +## Status + +This crate is an implementation detail of Apache DataFusion Java. Its API may +change between releases to track the needs of the native crates that depend on +it. diff --git a/native/src/errors.rs b/native-common/src/errors.rs similarity index 95% rename from native/src/errors.rs rename to native-common/src/errors.rs index d926544..f9dbb03 100644 --- a/native/src/errors.rs +++ b/native-common/src/errors.rs @@ -96,8 +96,11 @@ fn classify(err: &DataFusionError) -> &'static str { } DataFusionError::IoError(_) | DataFusionError::ObjectStore(_) - | DataFusionError::ParquetError(_) - | DataFusionError::AvroError(_) => "org/apache/datafusion/IoException", + | DataFusionError::ParquetError(_) => "org/apache/datafusion/IoException", + // The AvroError variant only exists when DataFusion is built with its + // `avro` feature, forwarded by this crate's own `avro` feature. + #[cfg(feature = "avro")] + DataFusionError::AvroError(_) => "org/apache/datafusion/IoException", // ArrowError is a 21-variant grab bag -- only some of those variants // are actually IO-shaped. DivideByZero / ArithmeticOverflow / Compute // / Cast / InvalidArgument / Memory etc. are execution-time failures @@ -161,7 +164,10 @@ fn throw(env: &mut JNIEnv, class: &str, message: &str) { let _ = env.throw_new(class, message); } -fn panic_message(panic: &Box) -> String { +/// Best-effort extraction of a panic payload's message. `catch_unwind` hands +/// back a `Box`; the payload is a `String` or `&str` for ordinary +/// `panic!`/`unwrap` sites, anything else is opaque. +pub fn panic_message(panic: &Box) -> String { if let Some(s) = panic.downcast_ref::() { s.clone() } else if let Some(s) = panic.downcast_ref::<&str>() { diff --git a/native-common/src/lib.rs b/native-common/src/lib.rs new file mode 100644 index 0000000..ba47004 --- /dev/null +++ b/native-common/src/lib.rs @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! JNI plumbing shared by this workspace's native crates (`datafusion-jni` +//! and `datafusion-spark-bridge`, and through the latter every bridge +//! cdylib): the error-to-Java-exception mapping, the per-cdylib Tokio +//! runtime singleton, and the async-stream-to-`FFI_ArrowArrayStream` +//! bridge. +//! +//! Each cdylib statically links its own copy of this rlib, so [`runtime`] is +//! a per-cdylib singleton -- exactly the behaviour each crate had when this +//! code lived inline. Nothing here is exported with `#[no_mangle]`, so +//! linking this crate into several cdylibs loaded in one JVM cannot collide. + +pub mod errors; + +use std::panic::{catch_unwind, AssertUnwindSafe}; +use std::sync::OnceLock; + +use datafusion::arrow::array::RecordBatch; +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::arrow::error::ArrowError; +use datafusion::arrow::record_batch::RecordBatchReader; +use datafusion::execution::SendableRecordBatchStream; +use futures::StreamExt; +use tokio::runtime::{Handle, Runtime}; + +static RT: OnceLock = OnceLock::new(); + +/// The cdylib-wide Tokio runtime. +pub fn runtime() -> &'static Runtime { + runtime_with_init(|_| {}) +} + +/// Same singleton as [`runtime`], with a hook that runs exactly once, when +/// the runtime is created. `datafusion-jni` uses it to install its +/// runtime-metrics accumulator so the sampling baseline coincides with +/// runtime start; every later call (either entry point) returns the existing +/// runtime without invoking the hook. +pub fn runtime_with_init(init: impl FnOnce(&Handle)) -> &'static Runtime { + RT.get_or_init(|| { + let rt = Runtime::new().expect("failed to create Tokio runtime"); + init(rt.handle()); + rt + }) +} + +/// Bridges DataFusion's async [`SendableRecordBatchStream`] to the synchronous +/// [`RecordBatchReader`] interface that `FFI_ArrowArrayStream` (and therefore +/// the Java `ArrowReader`) consumes. Each call to `next()` drives one +/// `runtime().block_on(stream.next())`, so memory pressure stays bounded by the +/// executor pipeline plus a single in-flight batch. +pub struct StreamingReader { + pub schema: SchemaRef, + pub stream: SendableRecordBatchStream, +} + +impl Iterator for StreamingReader { + type Item = Result; + + fn next(&mut self) -> Option { + // Arrow's C ABI invokes this iterator through FFI_ArrowArrayStream's + // vtable, outside the JNI handler's try_unwrap_or_throw guard. A panic + // here (buggy UDF, arrow cast that panics, runtime poison) would + // unwind across C/FFI -- undefined behaviour. Catch it and surface as + // an ArrowError so the Java side sees a normal exception instead. + let next = catch_unwind(AssertUnwindSafe(|| runtime().block_on(self.stream.next()))); + match next { + Ok(item) => item.map(|r| r.map_err(|e| ArrowError::ExternalError(Box::new(e)))), + Err(panic) => { + let msg = errors::panic_message(&panic); + Some(Err(ArrowError::ExternalError( + format!("panic in DataFrame stream: {msg}").into(), + ))) + } + } + } +} + +impl RecordBatchReader for StreamingReader { + fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} diff --git a/native-ffi/Cargo.toml b/native-ffi/Cargo.toml new file mode 100644 index 0000000..cd97d2b --- /dev/null +++ b/native-ffi/Cargo.toml @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "datafusion-scan-ffi" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +# Not published yet; this is the in-tree home of the plain-C scan ABI while it +# stabilizes. The intent is for this surface to eventually live in DataFusion +# proper (it has no JVM/JNI dependency), so keep it free of anything +# Java-specific. +publish = false + +[lib] +# `cdylib` -> the shippable plain-C shared library (`libdatafusion_scan_ffi`). +# `rlib` -> lets a downstream cdylib statically link this crate, register +# its own providers, and re-export the `df_scan_*` symbols; also +# gives `cargo test` a Rust harness that round-trips the ABI with +# no JVM in sight. +crate-type = ["cdylib", "rlib"] + +[features] +# A built-in in-memory provider builder registered under `datafusion.memory`, +# used by the round-trip tests and handy as a reference builder. Off by default +# so a production cdylib only carries the providers it registers itself. +demo-providers = [] + +[dependencies] +# The arrow C Data / C Stream interface types are the entire data plane of this +# ABI. `ffi` pulls in both `arrow::ffi` (FFI_ArrowSchema/Array) and +# `arrow::ffi_stream` (FFI_ArrowArrayStream). Same crate+version DataFusion +# links, so the types unify. +arrow = { workspace = true } +# `avro` enables AvroFormat for the listing provider; parquet/csv/json/arrow +# formats are on by default. +datafusion = { workspace = true, features = ["avro"] } +# Pushed filters arrive as serialized `datafusion.LogicalExprNode` protobufs -- +# the same vocabulary `datafusion-ffi` already uses, so the encoder is shared +# with any future Comet path. +datafusion-proto = { workspace = true } +futures = { workspace = true } +prost = { workspace = true } +tokio = { workspace = true } + +[dev-dependencies] +# Round-trip tests import the produced FFI_ArrowArrayStream back into Rust via +# the same C Stream interface a Java/Python/Go consumer would use. +datafusion-scan-ffi = { path = ".", features = ["demo-providers"] } + +[build-dependencies] +# Compiles scan_config.proto / scan_request.proto (and the per-format read +# option messages they embed) into Rust so provider builders can decode the +# `options` blob. Mirrors `native/build.rs`. +prost-build = { workspace = true } +protoc-bin-vendored = { workspace = true } diff --git a/native-ffi/build.rs b/native-ffi/build.rs new file mode 100644 index 0000000..a1be583 --- /dev/null +++ b/native-ffi/build.rs @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +fn main() { + // scan_config.proto embeds the per-format read-option messages, which in + // turn import file_compression_type; every transitively-referenced file + // must be compiled so the generated `ScanConfig` has its field types. + const PROTOS: &[&str] = &[ + "../proto/scan_config.proto", + "../proto/scan_request.proto", + "../proto/file_compression_type.proto", + "../proto/csv_read_options.proto", + "../proto/json_read_options.proto", + "../proto/parquet_read_options.proto", + "../proto/avro_read_options.proto", + "../proto/arrow_read_options.proto", + ]; + for p in PROTOS { + println!("cargo:rerun-if-changed={p}"); + } + // Honor a caller-provided PROTOC (e.g. a system install) and otherwise fall + // back to the vendored binary, matching `native/build.rs`. + if std::env::var_os("PROTOC").is_none() { + let protoc = protoc_bin_vendored::protoc_bin_path().expect("vendored protoc not available"); + std::env::set_var("PROTOC", protoc); + } + prost_build::compile_protos(PROTOS, &["../proto"]).expect("failed to compile protos"); +} diff --git a/native-ffi/include/datafusion_scan.h b/native-ffi/include/datafusion_scan.h new file mode 100644 index 0000000..afa6a2e --- /dev/null +++ b/native-ffi/include/datafusion_scan.h @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Plain-C scan ABI over the Arrow C Data / C Stream interface. +// +// The only "rich" types crossing this boundary are the standard Arrow C +// structs `ArrowSchema` and `ArrowArrayStream` (from Arrow's abi.h), which any +// Arrow implementation can produce/consume. Everything else is C primitives +// and borrowed (ptr, len) views. No JVM/JNI types appear here, by design. + +#ifndef DATAFUSION_SCAN_H +#define DATAFUSION_SCAN_H + +#include +#include + +#include "arrow/c/abi.h" // struct ArrowSchema, struct ArrowArrayStream + +#ifdef __cplusplus +extern "C" { +#endif + +// --- Status codes ---------------------------------------------------------- +// 0 on success; nonzero classifies the failure. On error the call also writes +// a malloc'd, NUL-terminated message to *out_err (free with df_error_free). +typedef enum { + DF_OK = 0, + DF_INVALID_ARGUMENT = 1, + DF_UNKNOWN_PROVIDER = 2, + DF_PROVIDER_BUILD = 3, + DF_PLANNING = 4, + DF_EXECUTION = 5, + DF_PANIC = 6, + DF_INTERNAL = 7 +} DfStatus; + +// --- Borrowed input views (caller owns the memory) ------------------------- +typedef struct { + const uint8_t* ptr; // UTF-8, not NUL-terminated; may be null if len == 0 + size_t len; +} DfStr; + +typedef struct { + const uint8_t* ptr; // may be null if len == 0 + size_t len; +} DfBytes; + +typedef struct { + DfStr key; + DfStr value; +} DfKeyValue; + +// Opaque planned-scan handle. +typedef struct DfScanHandle DfScanHandle; + +// --- Lifecycle / versioning ------------------------------------------------ + +// ABI major version; compare before any other call. +uint64_t df_scan_abi_version(void); + +// Free a message previously written to an out_err argument (null-safe). +void df_error_free(char* err); + +// --- Scan API -------------------------------------------------------------- + +// Probe a provider's output schema into the caller-allocated out_schema. +int32_t df_scan_schema(DfStr provider, DfBytes options, DfBytes partition, + struct ArrowSchema* out_schema, char** out_err); + +// Plan a scan. On success writes an owned handle to *out_handle (release with +// df_scan_close). projection is an array of column-name DfStr (empty = all); +// filters is an array of serialized datafusion.LogicalExprNode DfBytes; +// target_partitions / batch_size <= 0 keep DataFusion defaults; limit < 0 means +// no row limit. +int32_t df_scan_create(DfStr provider, DfBytes options, DfBytes partition, + int32_t target_partitions, int32_t batch_size, int64_t limit, + const DfKeyValue* config_overrides, size_t config_overrides_len, + const DfStr* projection, size_t projection_len, + const DfBytes* filters, size_t filters_len, + DfScanHandle** out_handle, char** out_err); + +// Output partition count of the planned scan. +int32_t df_scan_partition_count(const DfScanHandle* handle, int32_t* out_count, + char** out_err); + +// Execute one partition into the caller-allocated Arrow C Stream. +int32_t df_scan_execute_partition(const DfScanHandle* handle, int32_t partition, + struct ArrowArrayStream* out_stream, char** out_err); + +// Execute the whole plan as a single coalesced Arrow C Stream. +int32_t df_scan_execute(const DfScanHandle* handle, + struct ArrowArrayStream* out_stream, char** out_err); + +// Drop a planned scan (null-safe). Must not race an in-flight execute on the +// same handle. +void df_scan_close(DfScanHandle* handle); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // DATAFUSION_SCAN_H diff --git a/native-ffi/src/abi.rs b/native-ffi/src/abi.rs new file mode 100644 index 0000000..f037ad5 --- /dev/null +++ b/native-ffi/src/abi.rs @@ -0,0 +1,268 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! The plain-C front door: `extern "C"` entry points over C and Arrow C types. +//! +//! No `JNIEnv`, no JVM types, no name mangling -- the exported symbols are +//! `df_scan_*` / `df_error_*` and the only "rich" types that cross are the +//! standard Arrow C Data (`ArrowSchema`) and C Stream (`ArrowArrayStream`) +//! structs. A Java consumer reaches these through a ~2-method JNI shim or the +//! JDK 22+ FFM API; Python/Go/R/Rust reach them directly. +//! +//! Convention: every fallible call returns `0` on success and a nonzero +//! [`DfStatus`](crate::error::DfStatus) on failure, writing a malloc'd message +//! to `*out_err` (freed via [`df_error_free`]). Each is wrapped in +//! `catch_unwind` so a Rust panic becomes [`DfStatus::Panic`] instead of +//! unwinding across the C boundary (UB). + +use std::ffi::c_char; +use std::os::raw::c_int; +use std::panic::{catch_unwind, AssertUnwindSafe}; + +use datafusion::arrow::ffi::FFI_ArrowSchema; +use datafusion::arrow::ffi_stream::FFI_ArrowArrayStream; + +use crate::error::{finish, report, DfStatus, ScanError, ScanResult}; +use crate::ffi_types::{array, DfBytes, DfKeyValue, DfStr}; +use crate::reader::panic_message; +use crate::scan::{self, ScanHandle, ScanRequest}; + +/// Opaque handle to a planned scan. Created by [`df_scan_create`], freed by +/// [`df_scan_close`]. Never dereferenced by the consumer. +pub struct DfScanHandle { + inner: ScanHandle, +} + +/// Run `body`, turning a caught panic into a [`DfStatus::Panic`] status. +/// +/// # Safety +/// `out_err` must be null or a writable `*mut *mut c_char`. +unsafe fn guard(out_err: *mut *mut c_char, body: impl FnOnce() -> ScanResult<()>) -> c_int { + match catch_unwind(AssertUnwindSafe(body)) { + Ok(result) => finish(out_err, result), + Err(p) => report( + out_err, + ScanError::new( + DfStatus::Panic, + format!("panic in datafusion-scan-ffi: {}", panic_message(&p)), + ), + ), + } +} + +/// Major version of the ABI. A consumer compares this against the value it was +/// compiled for before calling anything else. +#[no_mangle] +pub extern "C" fn df_scan_abi_version() -> u64 { + crate::ABI_VERSION +} + +/// Free an error string previously written to an `out_err` argument. Safe to +/// call with null. +/// +/// # Safety +/// `err` must be null or a pointer previously returned through `out_err` by +/// one of the `df_scan_*` calls, and must not be used afterwards. +#[no_mangle] +pub unsafe extern "C" fn df_error_free(err: *mut c_char) { + if !err.is_null() { + drop(std::ffi::CString::from_raw(err)); + } +} + +/// Probe a provider's output schema, writing an Arrow C Schema into the +/// caller-allocated `out_schema`. +/// +/// # Safety +/// All pointer args follow the documented `(ptr, len)` borrow contract; +/// `out_schema` must point to a writable, uninitialized `ArrowSchema`. +#[no_mangle] +pub unsafe extern "C" fn df_scan_schema( + provider: DfStr, + options: DfBytes, + partition: DfBytes, + out_schema: *mut FFI_ArrowSchema, + out_err: *mut *mut c_char, +) -> c_int { + guard(out_err, || { + if out_schema.is_null() { + return Err(ScanError::invalid_argument("out_schema is null")); + } + let name = provider.as_str()?; + let schema = scan::schema(name, options.as_slice(), partition.as_slice())?; + let ffi = FFI_ArrowSchema::try_from(schema.as_ref())?; + std::ptr::write(out_schema, ffi); + Ok(()) + }) +} + +/// Plan a scan. On success writes an owned [`DfScanHandle`] pointer to +/// `*out_handle`; the caller must release it with [`df_scan_close`]. +/// +/// Session config overrides are a single `config_overrides` array of +/// [`DfKeyValue`]. `projection` is an array of column-name [`DfStr`]s (empty +/// selects all). `filters` is an array of serialized `datafusion.LogicalExprNode` +/// [`DfBytes`]. `limit` is the pushed row limit; a negative value means none. +/// +/// # Safety +/// Array args follow the `(ptr, len)` borrow contract; `out_handle` must be a +/// writable `*mut *mut DfScanHandle`. +#[no_mangle] +#[allow(clippy::too_many_arguments)] +pub unsafe extern "C" fn df_scan_create( + provider: DfStr, + options: DfBytes, + partition: DfBytes, + target_partitions: c_int, + batch_size: c_int, + limit: i64, + config_overrides: *const DfKeyValue, + config_overrides_len: usize, + projection: *const DfStr, + projection_len: usize, + filters: *const DfBytes, + filters_len: usize, + out_handle: *mut *mut DfScanHandle, + out_err: *mut *mut c_char, +) -> c_int { + guard(out_err, || { + if out_handle.is_null() { + return Err(ScanError::invalid_argument("out_handle is null")); + } + let provider = provider.as_str()?; + + let mut overrides = Vec::with_capacity(config_overrides_len); + for kv in array(config_overrides, config_overrides_len) { + overrides.push((kv.key.as_str()?.to_string(), kv.value.as_str()?.to_string())); + } + let mut cols = Vec::with_capacity(projection_len); + for s in array(projection, projection_len) { + cols.push(s.as_str()?.to_string()); + } + let mut filter_bytes = Vec::with_capacity(filters_len); + for b in array(filters, filters_len) { + filter_bytes.push(b.as_slice().to_vec()); + } + + let handle = scan::create(ScanRequest { + provider, + options: options.as_slice(), + partition: partition.as_slice(), + target_partitions, + batch_size, + limit: if limit < 0 { None } else { Some(limit as usize) }, + config_overrides: overrides, + projection: cols, + filters: filter_bytes, + })?; + + let boxed = Box::new(DfScanHandle { inner: handle }); + std::ptr::write(out_handle, Box::into_raw(boxed)); + Ok(()) + }) +} + +/// Number of output partitions of the planned scan. +/// +/// # Safety +/// `handle` must be a live pointer from [`df_scan_create`]; `out_count` must be +/// writable. +#[no_mangle] +pub unsafe extern "C" fn df_scan_partition_count( + handle: *const DfScanHandle, + out_count: *mut c_int, + out_err: *mut *mut c_char, +) -> c_int { + guard(out_err, || { + let h = handle + .as_ref() + .ok_or_else(|| ScanError::invalid_argument("scan handle is null"))?; + if out_count.is_null() { + return Err(ScanError::invalid_argument("out_count is null")); + } + std::ptr::write(out_count, h.inner.partition_count() as c_int); + Ok(()) + }) +} + +/// Execute one plan partition, writing an `FFI_ArrowArrayStream` into the +/// caller-allocated `out_stream`. The consumer imports it with its Arrow C +/// Stream importer (e.g. arrow-java `Data.importArrayStream`). +/// +/// # Safety +/// `handle` live; `out_stream` points to a writable, uninitialized +/// `ArrowArrayStream`. +#[no_mangle] +pub unsafe extern "C" fn df_scan_execute_partition( + handle: *const DfScanHandle, + partition: c_int, + out_stream: *mut FFI_ArrowArrayStream, + out_err: *mut *mut c_char, +) -> c_int { + guard(out_err, || { + let h = handle + .as_ref() + .ok_or_else(|| ScanError::invalid_argument("scan handle is null"))?; + if out_stream.is_null() { + return Err(ScanError::invalid_argument("out_stream is null")); + } + if partition < 0 { + return Err(ScanError::invalid_argument("partition index is negative")); + } + let reader = h.inner.execute_partition(partition as usize)?; + let ffi = FFI_ArrowArrayStream::new(Box::new(reader)); + std::ptr::write(out_stream, ffi); + Ok(()) + }) +} + +/// Execute the whole plan as a single coalesced stream. +/// +/// # Safety +/// As [`df_scan_execute_partition`]. +#[no_mangle] +pub unsafe extern "C" fn df_scan_execute( + handle: *const DfScanHandle, + out_stream: *mut FFI_ArrowArrayStream, + out_err: *mut *mut c_char, +) -> c_int { + guard(out_err, || { + let h = handle + .as_ref() + .ok_or_else(|| ScanError::invalid_argument("scan handle is null"))?; + if out_stream.is_null() { + return Err(ScanError::invalid_argument("out_stream is null")); + } + let reader = h.inner.execute_all()?; + let ffi = FFI_ArrowArrayStream::new(Box::new(reader)); + std::ptr::write(out_stream, ffi); + Ok(()) + }) +} + +/// Drop a planned scan. Must not race an in-flight execute on the same handle; +/// the consumer is responsible for that ordering. Safe to call with null. +/// +/// # Safety +/// `handle` must be null or a live pointer from [`df_scan_create`], not used +/// afterwards. +#[no_mangle] +pub unsafe extern "C" fn df_scan_close(handle: *mut DfScanHandle) { + if !handle.is_null() { + drop(Box::from_raw(handle)); + } +} diff --git a/native-ffi/src/demo.rs b/native-ffi/src/demo.rs new file mode 100644 index 0000000..ca27f3b --- /dev/null +++ b/native-ffi/src/demo.rs @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! A reference in-memory provider builder, gated behind the `demo-providers` +//! feature. Registered under `datafusion.memory`; the `options` bytes are +//! ignored. Used by the round-trip tests and as a minimal example of what a +//! real consumer's builder looks like. + +use std::sync::Arc; + +use datafusion::arrow::array::{Int64Array, StringArray}; +use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::catalog::TableProvider; +use datafusion::datasource::MemTable; +use datafusion::prelude::SessionContext; + +use crate::error::{DfStatus, ScanError, ScanResult}; +use crate::registry::register_provider; + +/// Registered builder name for the demo provider. +pub const NAME: &str = "datafusion.memory"; + +/// Register the demo provider. Call once at startup. +pub fn register() { + register_provider(NAME, build); +} + +/// Two-column (`id: Int64`, `name: Utf8`), two-batch in-memory table across +/// two partitions, so partition-count behavior is observable. +fn build( + _ctx: &SessionContext, + _options: &[u8], + _partition: &[u8], +) -> ScanResult> { + let schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("name", DataType::Utf8, true), + ])); + + let batch = |ids: Vec, names: Vec<&str>| -> ScanResult { + RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(Int64Array::from(ids)), + Arc::new(StringArray::from(names)), + ], + ) + .map_err(ScanError::from) + }; + + let p0 = batch(vec![1, 2, 3], vec!["a", "b", "c"])?; + let p1 = batch(vec![4, 5], vec!["d", "e"])?; + + MemTable::try_new(schema, vec![vec![p0], vec![p1]]) + .map(|t| Arc::new(t) as Arc) + .map_err(|e| ScanError::new(DfStatus::ProviderBuild, e.to_string())) +} diff --git a/native-ffi/src/error.rs b/native-ffi/src/error.rs new file mode 100644 index 0000000..71d8164 --- /dev/null +++ b/native-ffi/src/error.rs @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Error model for the C ABI. +//! +//! Rust-internal code works with [`ScanError`]; the `extern "C"` layer turns it +//! into an `i32` [`DfStatus`] return plus a heap-allocated message string. No +//! Rust error type ever crosses the boundary -- only a code and UTF-8 bytes. + +use std::ffi::{c_char, CString}; +use std::os::raw::c_int; + +use datafusion::arrow::error::ArrowError; +use datafusion::error::DataFusionError; + +/// Status codes returned by every fallible `df_scan_*` call. `0` is success; +/// the rest classify the failure coarsely so a consumer can branch without +/// parsing the message. Stable across an `ABI_VERSION`. +#[repr(i32)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DfStatus { + Ok = 0, + /// A required pointer argument was null, or a length/index was invalid. + InvalidArgument = 1, + /// `provider` is not a registered builder name. + UnknownProvider = 2, + /// The provider builder itself failed. + ProviderBuild = 3, + /// Planning failed (projection, filter decode, physical planning). + Planning = 4, + /// Stream execution setup failed. + Execution = 5, + /// A Rust panic was caught at the boundary. + Panic = 6, + /// Anything not covered above. + Internal = 7, +} + +/// Internal error carrying a status class and a human-readable message. +#[derive(Debug)] +pub struct ScanError { + pub status: DfStatus, + pub message: String, +} + +impl ScanError { + pub fn new(status: DfStatus, message: impl Into) -> Self { + Self { + status, + message: message.into(), + } + } + + pub fn invalid_argument(message: impl Into) -> Self { + Self::new(DfStatus::InvalidArgument, message) + } +} + +impl From for ScanError { + fn from(e: DataFusionError) -> Self { + Self::new(DfStatus::Planning, e.to_string()) + } +} + +impl From for ScanError { + fn from(e: ArrowError) -> Self { + Self::new(DfStatus::Internal, e.to_string()) + } +} + +impl From for ScanError { + fn from(e: prost::DecodeError) -> Self { + Self::new( + DfStatus::Planning, + format!("failed to decode pushed filter as LogicalExprNode: {e}"), + ) + } +} + +pub type ScanResult = Result; + +/// Write `err`'s message into `*out_err` as a freshly allocated, +/// NUL-terminated C string (freed by the caller via `df_error_free`) and +/// return its status code as `c_int`. `out_err` may be null, in which case the +/// message is dropped and only the code is returned. +/// +/// # Safety +/// `out_err` must be null or point to a writable `*mut c_char`. +pub unsafe fn report(out_err: *mut *mut c_char, err: ScanError) -> c_int { + if !out_err.is_null() { + // NUL bytes in the message would truncate it; replace defensively. + let sanitized = err.message.replace('\0', "\u{fffd}"); + match CString::new(sanitized) { + Ok(c) => *out_err = c.into_raw(), + Err(_) => *out_err = std::ptr::null_mut(), + } + } + err.status as c_int +} + +/// Collapse a `ScanResult<()>` into a status code, reporting any error through +/// `out_err`. +/// +/// # Safety +/// See [`report`]. +pub unsafe fn finish(out_err: *mut *mut c_char, result: ScanResult<()>) -> c_int { + match result { + Ok(()) => DfStatus::Ok as c_int, + Err(e) => report(out_err, e), + } +} diff --git a/native-ffi/src/ffi_types.rs b/native-ffi/src/ffi_types.rs new file mode 100644 index 0000000..a892a62 --- /dev/null +++ b/native-ffi/src/ffi_types.rs @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Borrowed C views passed *into* the ABI. +//! +//! These are non-owning `(ptr, len)` pairs: the caller owns the memory and +//! keeps it valid for the duration of the call. Nothing here is allocated or +//! freed by Rust. Using explicit `(ptr, len)` slices (rather than +//! NUL-terminated strings) means the surface is FFM-friendly and binary-safe. + +use std::slice; + +use crate::error::{ScanError, ScanResult}; + +/// A borrowed UTF-8 string slice. Not NUL-terminated. +#[repr(C)] +#[derive(Clone, Copy)] +pub struct DfStr { + pub ptr: *const u8, + pub len: usize, +} + +/// A borrowed byte slice. +#[repr(C)] +#[derive(Clone, Copy)] +pub struct DfBytes { + pub ptr: *const u8, + pub len: usize, +} + +/// A borrowed `(key, value)` UTF-8 pair, for session config overrides. +#[repr(C)] +#[derive(Clone, Copy)] +pub struct DfKeyValue { + pub key: DfStr, + pub value: DfStr, +} + +impl DfStr { + /// # Safety + /// `ptr` must be null or point to `len` valid bytes of UTF-8 that stay + /// alive for the borrow. + pub unsafe fn as_str(&self) -> ScanResult<&str> { + let bytes = self.as_bytes(); + std::str::from_utf8(bytes) + .map_err(|e| ScanError::invalid_argument(format!("argument is not valid UTF-8: {e}"))) + } + + /// # Safety + /// See [`DfStr::as_str`]. + pub unsafe fn as_bytes(&self) -> &[u8] { + if self.ptr.is_null() || self.len == 0 { + &[] + } else { + slice::from_raw_parts(self.ptr, self.len) + } + } +} + +impl DfBytes { + /// # Safety + /// `ptr` must be null or point to `len` valid bytes alive for the borrow. + pub unsafe fn as_slice(&self) -> &[u8] { + if self.ptr.is_null() || self.len == 0 { + &[] + } else { + slice::from_raw_parts(self.ptr, self.len) + } + } +} + +/// View a `(ptr, len)` array argument as a slice, treating null+0 as empty. +/// +/// # Safety +/// `ptr` must be null or point to `len` valid `T` for the borrow. +pub unsafe fn array<'a, T>(ptr: *const T, len: usize) -> &'a [T] { + if ptr.is_null() || len == 0 { + &[] + } else { + slice::from_raw_parts(ptr, len) + } +} diff --git a/native-ffi/src/lib.rs b/native-ffi/src/lib.rs new file mode 100644 index 0000000..0f83e74 --- /dev/null +++ b/native-ffi/src/lib.rs @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! A plain-C scan ABI over the Arrow C Data / C Stream interface. +//! +//! This crate exposes a DataFusion [`TableProvider`](datafusion::catalog::TableProvider) +//! scan as a set of `extern "C"` entry points that speak only C types and the +//! Arrow C Data interface. There is **no JVM/JNI dependency**: the front door +//! is callable from Java (via a thin JNI shim or the JDK 22+ FFM API), but also +//! from Python (cffi/ctypes), Go (cgo), R, or another Rust crate. That is the +//! property that lets the surface live close to DataFusion proper and get +//! reviewed by a wider audience -- the request on +//! . +//! +//! # Shape +//! +//! Providers are *compiled into* the final cdylib ("approach A"): a consumer +//! links this crate as an `rlib`, [`register_provider`]s its builders by name, +//! and the `df_scan_*` symbols are exported from the resulting shared library. +//! The data plane never crosses as serialized batches -- each scanned +//! partition is handed back as a standard `FFI_ArrowArrayStream` the consumer +//! imports zero-copy. +//! +//! # The ABI +//! +//! See `include/datafusion_scan.h` for the C header. In brief: +//! +//! - [`abi::df_scan_schema`] -- probe the output schema (Arrow C Schema) +//! - [`abi::df_scan_create`] -- plan a scan, returns an opaque handle +//! - [`abi::df_scan_partition_count`] -- number of output partitions +//! - [`abi::df_scan_execute_partition`] -- one partition -> Arrow C Stream +//! - [`abi::df_scan_execute`] -- whole plan -> Arrow C Stream +//! - [`abi::df_scan_close`] -- drop the handle +//! - [`abi::df_error_free`] -- free an error string +//! - [`abi::df_scan_abi_version`] -- ABI major version for compatibility +//! +//! Every fallible call returns `0` on success and a nonzero +//! [`error::DfStatus`] code on failure, setting `*out_err` to a malloc'd, +//! NUL-terminated message the caller frees with `df_error_free`. + +pub mod abi; +pub mod error; +pub mod ffi_types; +pub mod listing; +pub mod reader; +pub mod registry; +pub mod runtime; +pub mod scan; + +/// Generated protobuf types for the scan config / request wire formats +/// (`proto/scan_config.proto`, `proto/scan_request.proto`). The `ScanConfig` +/// blob is decoded by provider builders; `ScanRequest` is the engine-side +/// staging object exploded into the C call's typed arguments. +pub mod proto { + include!(concat!(env!("OUT_DIR"), "/datafusion_java.rs")); +} + +#[cfg(feature = "demo-providers")] +pub mod demo; + +pub use registry::register_provider; + +/// Major version of this ABI. Bumped on any breaking change to a `df_scan_*` +/// signature or to the meaning of its arguments. Consumers compare against the +/// value they were built for via [`abi::df_scan_abi_version`]. +pub const ABI_VERSION: u64 = 1; diff --git a/native-ffi/src/listing.rs b/native-ffi/src/listing.rs new file mode 100644 index 0000000..5b8aed6 --- /dev/null +++ b/native-ffi/src/listing.rs @@ -0,0 +1,226 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! A real file-backed provider builder, registered under `datafusion.listing`. +//! +//! Decodes the [`ScanConfig`](crate::proto::ScanConfig) blob into a DataFusion +//! [`ListingTable`] over one or more paths read with a single file format. +//! Demonstrates a builder that needs the session context: when no explicit +//! schema is supplied it infers one from the data (and the context's object +//! store registry resolves the paths). +//! +//! Object stores for remote URIs (s3://, gs://, ...) must be registered on the +//! context by the embedding cdylib before a scan runs; the default context +//! resolves local paths out of the box. + +use std::io::Cursor; +use std::sync::Arc; + +use datafusion::arrow::datatypes::{Schema, SchemaRef}; +use datafusion::arrow::ipc::reader::StreamReader; +use datafusion::catalog::TableProvider; +use datafusion::datasource::file_format::arrow::ArrowFormat; +use datafusion::datasource::file_format::avro::AvroFormat; +use datafusion::datasource::file_format::csv::CsvFormat; +use datafusion::datasource::file_format::file_compression_type::FileCompressionType; +use datafusion::datasource::file_format::json::JsonFormat; +use datafusion::datasource::file_format::parquet::ParquetFormat; +use datafusion::datasource::file_format::FileFormat; +use datafusion::datasource::listing::{ + ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, +}; +use datafusion::prelude::SessionContext; +use prost::Message; + +use crate::error::{DfStatus, ScanError, ScanResult}; +use crate::proto::{listing_source, scan_config, FileCompressionType as ProtoCompression}; +use crate::proto::{ListingSource, ScanConfig}; +use crate::registry::register_provider; +use crate::runtime::handle; + +/// Registered builder name for the listing provider. +pub const NAME: &str = "datafusion.listing"; + +/// Register the listing provider. Call once at startup. +pub fn register() { + register_provider(NAME, build); +} + +fn build( + ctx: &SessionContext, + options: &[u8], + _partition: &[u8], +) -> ScanResult> { + let config = ScanConfig::decode(options).map_err(|e| { + ScanError::new( + DfStatus::ProviderBuild, + format!("failed to decode ScanConfig: {e}"), + ) + })?; + + let listing = match config.source { + Some(scan_config::Source::Listing(l)) => l, + Some(scan_config::Source::Custom(_)) => { + return Err(ScanError::new( + DfStatus::ProviderBuild, + "datafusion.listing requires a listing source, got custom bytes", + )) + } + None => { + return Err(ScanError::new( + DfStatus::ProviderBuild, + "datafusion.listing requires a listing source, none set", + )) + } + }; + + if listing.paths.is_empty() { + return Err(ScanError::new( + DfStatus::ProviderBuild, + "listing source has no paths", + )); + } + + let table_paths = listing + .paths + .iter() + .map(|p| { + ListingTableUrl::parse(p).map_err(|e| { + ScanError::new(DfStatus::ProviderBuild, format!("invalid path {p:?}: {e}")) + }) + }) + .collect::>>()?; + + let listing_options = listing_options(&listing)?; + + let mut table_config = + ListingTableConfig::new_with_multi_paths(table_paths).with_listing_options(listing_options); + + table_config = match &listing.schema_ipc { + Some(bytes) => table_config.with_schema(schema_from_ipc(bytes)?), + // No explicit schema: infer from the data, using the context's state + // (and thus its object store registry) to read it. + None => handle() + .block_on(table_config.infer_schema(&ctx.state())) + .map_err(|e| { + ScanError::new( + DfStatus::ProviderBuild, + format!("failed to infer listing schema: {e}"), + ) + })?, + }; + + let table = ListingTable::try_new(table_config) + .map_err(|e| ScanError::new(DfStatus::ProviderBuild, e.to_string()))?; + Ok(Arc::new(table)) +} + +/// Map the proto format oneof to a DataFusion [`ListingOptions`]. Covers the +/// option fields the read-option messages expose today; unset fields keep the +/// format's defaults. +fn listing_options(listing: &ListingSource) -> ScanResult { + use listing_source::Format; + + let (format, default_ext): (Arc, &str) = match &listing.format { + Some(Format::Csv(c)) => { + let mut fmt = CsvFormat::default() + .with_has_header(c.has_header) + .with_delimiter(byte(c.delimiter, b',')?) + .with_quote(byte(c.quote, b'"')?) + .with_newlines_in_values(c.newlines_in_values.unwrap_or(false)) + .with_file_compression_type(compression(c.file_compression_type)); + if let Some(t) = c.terminator { + fmt = fmt.with_terminator(Some(byte(t, b'\n')?)); + } + if let Some(e) = c.escape { + fmt = fmt.with_escape(Some(byte(e, b'\\')?)); + } + if let Some(cm) = c.comment { + fmt = fmt.with_comment(Some(byte(cm, b'#')?)); + } + (Arc::new(fmt), extension(&c.file_extension, ".csv")) + } + Some(Format::Json(j)) => { + let fmt = JsonFormat::default() + .with_file_compression_type(compression(j.file_compression_type)); + (Arc::new(fmt), extension(&j.file_extension, ".json")) + } + Some(Format::Parquet(p)) => { + // Parquet read tuning (pruning / metadata hints) is applied through + // session config at scan time, not on the format here. + ( + Arc::new(ParquetFormat::default()), + extension(&p.file_extension, ".parquet"), + ) + } + Some(Format::Avro(a)) => (Arc::new(AvroFormat), extension(&a.file_extension, ".avro")), + Some(Format::Arrow(a)) => ( + Arc::new(ArrowFormat), + extension(&a.file_extension, ".arrow"), + ), + None => { + return Err(ScanError::new( + DfStatus::ProviderBuild, + "listing source has no file format", + )) + } + }; + + Ok(ListingOptions::new(format).with_file_extension(default_ext.to_string())) +} + +/// A single byte sent over the wire as a `uint32`. Falls back to `default` when +/// the field is unset (0), and rejects values that do not fit in a byte. +fn byte(value: u32, default: u8) -> ScanResult { + if value == 0 { + return Ok(default); + } + u8::try_from(value) + .map_err(|_| ScanError::invalid_argument(format!("byte option {value} exceeds 255"))) +} + +fn extension<'a>(configured: &'a str, default: &'a str) -> &'a str { + if configured.is_empty() { + default + } else { + configured + } +} + +fn compression(value: i32) -> FileCompressionType { + match ProtoCompression::try_from(value) { + Ok(ProtoCompression::Gzip) => FileCompressionType::GZIP, + Ok(ProtoCompression::Bzip2) => FileCompressionType::BZIP2, + Ok(ProtoCompression::Xz) => FileCompressionType::XZ, + Ok(ProtoCompression::Zstd) => FileCompressionType::ZSTD, + // Unspecified / uncompressed / unknown -> uncompressed. + _ => FileCompressionType::UNCOMPRESSED, + } +} + +/// Read a `SchemaRef` from Arrow IPC stream bytes (a schema message, optionally +/// followed by zero batches -- the shape `StreamWriter::finish` produces). +fn schema_from_ipc(bytes: &[u8]) -> ScanResult { + let reader = StreamReader::try_new(Cursor::new(bytes), None).map_err(|e| { + ScanError::new( + DfStatus::ProviderBuild, + format!("failed to read schema_ipc: {e}"), + ) + })?; + let schema: Schema = reader.schema().as_ref().clone(); + Ok(Arc::new(schema)) +} diff --git a/native-ffi/src/reader.rs b/native-ffi/src/reader.rs new file mode 100644 index 0000000..445668e --- /dev/null +++ b/native-ffi/src/reader.rs @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Bridge from DataFusion's async stream to the synchronous +//! [`RecordBatchReader`] that `FFI_ArrowArrayStream` pulls. + +use std::panic::{catch_unwind, AssertUnwindSafe}; + +use datafusion::arrow::array::RecordBatch; +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::arrow::error::ArrowError; +use datafusion::arrow::record_batch::RecordBatchReader; +use datafusion::execution::SendableRecordBatchStream; +use futures::StreamExt; + +use crate::runtime::runtime; + +/// Wraps a [`SendableRecordBatchStream`] as a [`RecordBatchReader`]. Each +/// `next()` drives one `block_on(stream.next())`, so memory stays bounded by +/// the pipeline plus a single in-flight batch. +pub struct StreamingReader { + pub schema: SchemaRef, + pub stream: SendableRecordBatchStream, +} + +impl Iterator for StreamingReader { + type Item = Result; + + fn next(&mut self) -> Option { + // Arrow's C Stream vtable calls this from the *consumer's* thread, + // outside any guard. A panic unwinding across the C boundary is UB, so + // catch it and surface as an ArrowError -- the consumer sees a normal + // stream error (mapped to an exception on the Java side). + let next = catch_unwind(AssertUnwindSafe(|| runtime().block_on(self.stream.next()))); + match next { + Ok(item) => item.map(|r| r.map_err(|e| ArrowError::ExternalError(Box::new(e)))), + Err(panic) => Some(Err(ArrowError::ExternalError( + format!("panic in DataFusion stream: {}", panic_message(&panic)).into(), + ))), + } + } +} + +impl RecordBatchReader for StreamingReader { + fn schema(&self) -> SchemaRef { + self.schema.clone() + } +} + +/// Best-effort extraction of a panic payload's message. +pub fn panic_message(panic: &(dyn std::any::Any + Send)) -> String { + if let Some(s) = panic.downcast_ref::<&str>() { + (*s).to_string() + } else if let Some(s) = panic.downcast_ref::() { + s.clone() + } else { + "unknown panic".to_string() + } +} diff --git a/native-ffi/src/registry.rs b/native-ffi/src/registry.rs new file mode 100644 index 0000000..bccfb0c --- /dev/null +++ b/native-ffi/src/registry.rs @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Provider builder registry. +//! +//! "Approach A" means the providers ship compiled into the final cdylib rather +//! than being imported over an FFI. A consumer registers each builder by name +//! at startup; the C ABI selects one by that name and hands it the opaque +//! `options`/`partition` byte blobs it was given. The builder decodes those +//! however it likes (protobuf, JSON, bincode) -- the ABI stays oblivious. + +use std::collections::HashMap; +use std::sync::{Arc, RwLock}; + +use datafusion::catalog::TableProvider; +use datafusion::prelude::SessionContext; + +use crate::error::{DfStatus, ScanError, ScanResult}; + +/// Builds a provider from caller-supplied bytes. +/// +/// * `ctx` -- the scan's session context, already configured with the +/// caller's tuning/overrides. A builder that must infer a schema or read an +/// object store (e.g. a listing table) uses `ctx.state()` for that; simple +/// in-memory providers ignore it. +/// * `options` -- provider-level config (which table, paths, schema, ...). +/// * `partition` -- optional per-partition slice descriptor; empty for a +/// whole-table scan. +/// +/// `options`/`partition` are opaque to the ABI; their encoding is a contract +/// between the registrant and whoever fills the bytes on the other side of the +/// boundary (the in-tree builders use [`crate::proto::ScanConfig`]). +pub type ProviderBuilder = fn( + ctx: &SessionContext, + options: &[u8], + partition: &[u8], +) -> ScanResult>; + +fn registry() -> &'static RwLock> { + static REGISTRY: std::sync::OnceLock>> = + std::sync::OnceLock::new(); + REGISTRY.get_or_init(|| RwLock::new(HashMap::new())) +} + +/// Register `builder` under `name`, replacing any previous registration. +/// Call once per provider at cdylib startup (e.g. from a `#[ctor]` or an +/// exported init function the consumer invokes). +pub fn register_provider(name: impl Into, builder: ProviderBuilder) { + registry() + .write() + .expect("provider registry poisoned") + .insert(name.into(), builder); +} + +/// Look up `name` and build a provider from the given bytes. +pub fn build_provider( + name: &str, + ctx: &SessionContext, + options: &[u8], + partition: &[u8], +) -> ScanResult> { + let builder = { + let guard = registry().read().expect("provider registry poisoned"); + guard.get(name).copied() + }; + match builder { + Some(b) => b(ctx, options, partition), + None => Err(ScanError::new( + DfStatus::UnknownProvider, + format!("no provider builder registered under name {name:?}"), + )), + } +} diff --git a/native-ffi/src/runtime.rs b/native-ffi/src/runtime.rs new file mode 100644 index 0000000..87fe2e7 --- /dev/null +++ b/native-ffi/src/runtime.rs @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! The cdylib-wide Tokio runtime. +//! +//! DataFusion planning and execution are async; this ABI is synchronous, so +//! every call that awaits does so through this runtime. Statically linked into +//! whatever cdylib embeds this crate, so it is a per-cdylib singleton -- two +//! libraries loaded in one process get independent runtimes and cannot collide. +//! +//! This mirrors `datafusion-jni-common`'s runtime but is deliberately +//! duplicated here so the C ABI carries no dependency on the JNI crate. + +use std::sync::OnceLock; + +use tokio::runtime::{Handle, Runtime}; + +static RT: OnceLock = OnceLock::new(); + +/// The shared multi-thread Tokio runtime, created on first use. +pub fn runtime() -> &'static Runtime { + RT.get_or_init(|| Runtime::new().expect("failed to create Tokio runtime")) +} + +/// Handle to [`runtime`], for `block_on` / `enter`. +pub fn handle() -> &'static Handle { + runtime().handle() +} diff --git a/native-ffi/src/scan.rs b/native-ffi/src/scan.rs new file mode 100644 index 0000000..4a668d0 --- /dev/null +++ b/native-ffi/src/scan.rs @@ -0,0 +1,183 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Planning and execution core, free of any C/JVM concerns. +//! +//! This is the JNI-free port of the logic in PR #103's `spark/bridge/src/scan.rs`: +//! build the provider, register it on a private `SessionContext` with the +//! caller-pinned config, apply the pruned projection and proto-encoded pushed +//! filters, and plan once. The resulting [`ScanHandle`] then yields one +//! independent stream per plan partition. +//! +//! Spark-specific type widening is intentionally **not** here: it is a consumer +//! concern (apply a `WideningTableProvider` decorator inside the registered +//! builder if you need it), so this core stays a faithful DataFusion scan. + +use std::sync::Arc; + +use datafusion::arrow::datatypes::SchemaRef; +use datafusion::dataframe::DataFrame; +use datafusion::execution::TaskContext; +use datafusion::physical_plan::{execute_stream, ExecutionPlan}; +use datafusion::prelude::{SessionConfig, SessionContext}; +use datafusion_proto::logical_plan::from_proto::parse_expr; +use datafusion_proto::logical_plan::DefaultLogicalExtensionCodec; +use datafusion_proto::protobuf::LogicalExprNode; +use prost::Message; + +use crate::error::{DfStatus, ScanError, ScanResult}; +use crate::reader::StreamingReader; +use crate::registry::build_provider; +use crate::runtime::handle; + +/// Registration name of the provider on the scan's private context. Never +/// surfaces in SQL (the plan is built through the DataFrame API), so no +/// quoting/collision concern. +const SCAN_TABLE_NAME: &str = "df_scan"; + +/// Inputs to [`create`], decoded from the C arguments by the ABI layer. +pub struct ScanRequest<'a> { + pub provider: &'a str, + pub options: &'a [u8], + pub partition: &'a [u8], + /// `<= 0` leaves the DataFusion default. + pub target_partitions: i32, + /// `<= 0` leaves the DataFusion default. + pub batch_size: i32, + pub config_overrides: Vec<(String, String)>, + /// Column names to project; empty selects all. + pub projection: Vec, + /// Each entry is a serialized `datafusion.LogicalExprNode`. + pub filters: Vec>, + /// Optional row limit pushed into the scan. `None` means no limit. + pub limit: Option, +} + +/// A planned scan. Holds the context alive for the plan's lifetime. +pub struct ScanHandle { + _ctx: SessionContext, + plan: Arc, + task_ctx: Arc, +} + +/// Build the provider via the registry and return its output schema, without +/// planning. Mirrors #103's `provider_schema_ipc`, but returns the live +/// `SchemaRef` (the ABI converts it to an Arrow C Schema). Uses a default +/// context -- enough for schema inference against the default (local) object +/// store; a provider needing custom stores should be built through [`create`]. +pub fn schema(provider: &str, options: &[u8], partition: &[u8]) -> ScanResult { + let ctx = SessionContext::new(); + let provider = build_provider(provider, &ctx, options, partition)?; + Ok(provider.schema()) +} + +/// Build, register, project, filter, and plan exactly once. +pub fn create(req: ScanRequest<'_>) -> ScanResult { + // Build the context first: a provider may need it (schema inference, object + // store access) at construction time. + let mut config = SessionConfig::new(); + if req.target_partitions > 0 { + config = config.with_target_partitions(req.target_partitions as usize); + } + if req.batch_size > 0 { + config = config.with_batch_size(req.batch_size as usize); + } + for (key, value) in &req.config_overrides { + config.options_mut().set(key, value)?; + } + + let ctx = SessionContext::new_with_config(config); + let provider = build_provider(req.provider, &ctx, req.options, req.partition)?; + ctx.register_table(SCAN_TABLE_NAME, provider)?; + + let mut df: DataFrame = handle().block_on(ctx.table(SCAN_TABLE_NAME))?; + if !req.projection.is_empty() { + let refs: Vec<&str> = req.projection.iter().map(String::as_str).collect(); + df = df.select_columns(&refs)?; + } + for bytes in &req.filters { + let node = LogicalExprNode::decode(bytes.as_slice())?; + // TaskContext implements FunctionRegistry; the default codec suffices + // for the column/literal/builtin expressions a predicate translator + // emits. + let registry = df.task_ctx(); + let expr = parse_expr(&node, ®istry, &DefaultLogicalExtensionCodec {}) + .map_err(|e| ScanError::new(DfStatus::Planning, e.to_string()))?; + df = df.filter(expr)?; + } + if let Some(fetch) = req.limit { + df = df.limit(0, Some(fetch))?; + } + + // task_ctx() borrows df; capture before create_physical_plan consumes it. + let task_ctx = Arc::new(df.task_ctx()); + let plan = handle().block_on(df.create_physical_plan())?; + + Ok(ScanHandle { + _ctx: ctx, + plan, + task_ctx, + }) +} + +impl ScanHandle { + /// Output partition count of the planned physical plan. + pub fn partition_count(&self) -> usize { + self.plan + .properties() + .output_partitioning() + .partition_count() + } + + /// Open an independent reader over one plan partition. Concurrently + /// callable across partitions: `ExecutionPlan`/`TaskContext` are + /// `Send + Sync`, and each call only clones their `Arc`s. + pub fn execute_partition(&self, partition: usize) -> ScanResult { + let count = self.partition_count(); + if partition >= count { + return Err(ScanError::new( + DfStatus::InvalidArgument, + format!("partition index {partition} out of range: plan has {count} partition(s)"), + )); + } + let plan = Arc::clone(&self.plan); + let task_ctx = Arc::clone(&self.task_ctx); + let schema: SchemaRef = plan.schema(); + + // execute() is synchronous but operators may tokio::spawn at + // execute()-time (RepartitionExec et al.), needing a runtime context. + let stream = { + let _guard = handle().enter(); + plan.execute(partition, task_ctx) + .map_err(|e| ScanError::new(DfStatus::Execution, e.to_string()))? + }; + Ok(StreamingReader { schema, stream }) + } + + /// Open one reader over the whole plan (all partitions coalesced). + pub fn execute_all(&self) -> ScanResult { + let plan = Arc::clone(&self.plan); + let task_ctx = Arc::clone(&self.task_ctx); + let schema: SchemaRef = plan.schema(); + let stream = { + let _guard = handle().enter(); + execute_stream(plan, task_ctx) + .map_err(|e| ScanError::new(DfStatus::Execution, e.to_string()))? + }; + Ok(StreamingReader { schema, stream }) + } +} diff --git a/native-ffi/tests/listing.rs b/native-ffi/tests/listing.rs new file mode 100644 index 0000000..eac5e2e --- /dev/null +++ b/native-ffi/tests/listing.rs @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! End-to-end test of the `datafusion.listing` provider through the plain-C +//! ABI: write a CSV, encode a ScanConfig pointing at it, scan it, and import +//! the result back through the Arrow C Stream interface -- the path a foreign +//! consumer takes. Exercises schema inference (no explicit schema supplied). + +use std::ffi::{c_char, CStr}; +use std::fs; +use std::process; +use std::ptr; + +use datafusion::arrow::array::Int64Array; +use datafusion::arrow::ffi::FFI_ArrowSchema; +use datafusion::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}; + +use datafusion_scan_ffi::abi::{ + df_error_free, df_scan_close, df_scan_create, df_scan_execute, df_scan_partition_count, + df_scan_schema, DfScanHandle, +}; +use datafusion_scan_ffi::ffi_types::{DfBytes, DfStr}; +use datafusion_scan_ffi::listing; +use datafusion_scan_ffi::proto::{ + listing_source, scan_config, CsvReadOptionsProto, ListingSource, ScanConfig, +}; +use prost::Message; + +unsafe fn take_err(err: *mut c_char) -> Option { + if err.is_null() { + None + } else { + let s = CStr::from_ptr(err).to_string_lossy().into_owned(); + df_error_free(err); + Some(s) + } +} + +/// Write a CSV into a unique temp dir and return (dir, file path). +fn write_csv() -> (std::path::PathBuf, String) { + let dir = std::env::temp_dir().join(format!("df-scan-ffi-{}", process::id())); + fs::create_dir_all(&dir).expect("create temp dir"); + let path = dir.join("data.csv"); + fs::write(&path, "id,name\n1,a\n2,b\n3,c\n").expect("write csv"); + (dir, path.to_string_lossy().into_owned()) +} + +/// Encode a ScanConfig for a CSV listing source over `path`. +fn csv_config(path: &str) -> Vec { + ScanConfig { + provider: listing::NAME.to_string(), + source: Some(scan_config::Source::Listing(ListingSource { + paths: vec![path.to_string()], + schema_ipc: None, + format: Some(listing_source::Format::Csv(CsvReadOptionsProto { + has_header: true, + delimiter: b',' as u32, + quote: b'"' as u32, + file_extension: ".csv".to_string(), + ..Default::default() + })), + })), + } + .encode_to_vec() +} + +fn provider() -> DfStr { + DfStr { + ptr: listing::NAME.as_ptr(), + len: listing::NAME.len(), + } +} + +fn options(bytes: &[u8]) -> DfBytes { + DfBytes { + ptr: bytes.as_ptr(), + len: bytes.len(), + } +} + +const EMPTY: DfBytes = DfBytes { + ptr: ptr::null(), + len: 0, +}; + +#[test] +fn listing_csv_schema_is_inferred() { + listing::register(); + let (_dir, path) = write_csv(); + let cfg = csv_config(&path); + + let mut schema = FFI_ArrowSchema::empty(); + let mut err: *mut c_char = ptr::null_mut(); + let status = unsafe { df_scan_schema(provider(), options(&cfg), EMPTY, &mut schema, &mut err) }; + assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) }); + + let schema = + datafusion::arrow::datatypes::Schema::try_from(&schema).expect("import FFI_ArrowSchema"); + let names: Vec<_> = schema.fields().iter().map(|f| f.name().as_str()).collect(); + assert_eq!(names, vec!["id", "name"]); +} + +#[test] +fn listing_csv_scans_rows() { + listing::register(); + let (_dir, path) = write_csv(); + let cfg = csv_config(&path); + + // Plan. + let mut handle: *mut DfScanHandle = ptr::null_mut(); + let mut err: *mut c_char = ptr::null_mut(); + let status = unsafe { + df_scan_create( + provider(), + options(&cfg), + EMPTY, + 0, + 0, + -1, + ptr::null(), + 0, + ptr::null(), + 0, + ptr::null(), + 0, + &mut handle, + &mut err, + ) + }; + assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) }); + assert!(!handle.is_null()); + + // Partition count is reported. + let mut count = 0i32; + let mut err2: *mut c_char = ptr::null_mut(); + assert_eq!( + unsafe { df_scan_partition_count(handle, &mut count, &mut err2) }, + 0 + ); + assert!(count >= 1, "expected at least one partition, got {count}"); + + // Execute the whole plan as one coalesced stream and sum `id`. + let mut stream = FFI_ArrowArrayStream::empty(); + let mut err3: *mut c_char = ptr::null_mut(); + let status = unsafe { df_scan_execute(handle, &mut stream, &mut err3) }; + assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err3) }); + + let reader = unsafe { ArrowArrayStreamReader::from_raw(&mut stream) }.expect("import stream"); + let mut total: i64 = 0; + let mut rows = 0usize; + for batch in reader { + let batch = batch.expect("batch"); + rows += batch.num_rows(); + let ids = batch + .column(0) + .as_any() + .downcast_ref::() + .expect("id is Int64"); + total += ids.values().iter().sum::(); + } + assert_eq!(rows, 3); + assert_eq!(total, 1 + 2 + 3); + + unsafe { df_scan_close(handle) }; +} diff --git a/native-ffi/tests/proto.rs b/native-ffi/tests/proto.rs new file mode 100644 index 0000000..ac668c7 --- /dev/null +++ b/native-ffi/tests/proto.rs @@ -0,0 +1,93 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Confirms the generated scan-config / scan-request types encode and decode, +//! including a per-format read-option message embedded through the source +//! oneof -- i.e. the imports across `proto/*.proto` resolved at build time. + +use datafusion_scan_ffi::proto::{ + listing_source, scan_config, CsvReadOptionsProto, ListingSource, ScanConfig, ScanRequest, +}; +use prost::Message; + +#[test] +fn scan_config_with_listing_source_roundtrips() { + let config = ScanConfig { + provider: "datafusion.listing".to_string(), + source: Some(scan_config::Source::Listing(ListingSource { + paths: vec!["s3://bucket/data/".to_string()], + schema_ipc: None, + format: Some(listing_source::Format::Csv(CsvReadOptionsProto { + has_header: true, + delimiter: b',' as u32, + quote: b'"' as u32, + file_extension: ".csv".to_string(), + ..Default::default() + })), + })), + }; + + let bytes = config.encode_to_vec(); + let decoded = ScanConfig::decode(bytes.as_slice()).expect("decode ScanConfig"); + + assert_eq!(decoded.provider, "datafusion.listing"); + match decoded.source { + Some(scan_config::Source::Listing(l)) => { + assert_eq!(l.paths, vec!["s3://bucket/data/".to_string()]); + match l.format { + Some(listing_source::Format::Csv(c)) => { + assert!(c.has_header); + assert_eq!(c.delimiter, b',' as u32); + } + other => panic!("expected CSV format, got {other:?}"), + } + } + other => panic!("expected listing source, got {other:?}"), + } +} + +#[test] +fn scan_request_roundtrips() { + let req = ScanRequest { + projection: vec!["id".to_string(), "name".to_string()], + filters: vec![vec![1, 2, 3], vec![4, 5]], + limit: Some(100), + target_partitions: 8, + batch_size: 0, + config_overrides: [( + "datafusion.execution.parquet.pushdown_filters".to_string(), + "true".to_string(), + )] + .into_iter() + .collect(), + }; + + let bytes = req.encode_to_vec(); + let decoded = ScanRequest::decode(bytes.as_slice()).expect("decode ScanRequest"); + + assert_eq!(decoded.projection, vec!["id", "name"]); + assert_eq!(decoded.filters.len(), 2); + assert_eq!(decoded.limit, Some(100)); + assert_eq!(decoded.target_partitions, 8); + assert_eq!( + decoded + .config_overrides + .get("datafusion.execution.parquet.pushdown_filters") + .map(String::as_str), + Some("true") + ); +} diff --git a/native-ffi/tests/roundtrip.rs b/native-ffi/tests/roundtrip.rs new file mode 100644 index 0000000..3ec6436 --- /dev/null +++ b/native-ffi/tests/roundtrip.rs @@ -0,0 +1,213 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Exercises the plain-C ABI exactly as a foreign consumer would: call the +//! `df_scan_*` entry points with C structs, hand a caller-allocated +//! `FFI_ArrowArrayStream` across the boundary, then import it back through the +//! Arrow C Stream interface (`ArrowArrayStreamReader`) -- the Rust analogue of +//! arrow-java's `Data.importArrayStream`. No JVM involved. + +use std::ffi::{c_char, CStr}; +use std::ptr; + +use datafusion::arrow::array::Int64Array; +use datafusion::arrow::ffi::FFI_ArrowSchema; +use datafusion::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream}; + +use datafusion_scan_ffi::abi::{ + df_error_free, df_scan_abi_version, df_scan_close, df_scan_create, df_scan_execute, + df_scan_execute_partition, df_scan_partition_count, df_scan_schema, DfScanHandle, +}; +use datafusion_scan_ffi::ffi_types::{DfBytes, DfStr}; +use datafusion_scan_ffi::{demo, ABI_VERSION}; + +fn provider() -> DfStr { + DfStr { + ptr: demo::NAME.as_ptr(), + len: demo::NAME.len(), + } +} + +const EMPTY_BYTES: DfBytes = DfBytes { + ptr: ptr::null(), + len: 0, +}; + +/// Pull an err string (if any) for assertions, freeing it. +unsafe fn take_err(err: *mut c_char) -> Option { + if err.is_null() { + None + } else { + let s = CStr::from_ptr(err).to_string_lossy().into_owned(); + df_error_free(err); + Some(s) + } +} + +#[test] +fn abi_version_matches() { + assert_eq!(df_scan_abi_version(), ABI_VERSION); +} + +#[test] +fn schema_probe_returns_provider_schema() { + demo::register(); + let mut out = FFI_ArrowSchema::empty(); + let mut err: *mut c_char = ptr::null_mut(); + let status = + unsafe { df_scan_schema(provider(), EMPTY_BYTES, EMPTY_BYTES, &mut out, &mut err) }; + assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) }); + + let schema = + datafusion::arrow::datatypes::Schema::try_from(&out).expect("import FFI_ArrowSchema"); + let names: Vec<_> = schema.fields().iter().map(|f| f.name().as_str()).collect(); + assert_eq!(names, vec!["id", "name"]); +} + +#[test] +fn unknown_provider_reports_status_and_message() { + let bad = DfStr { + ptr: b"nope".as_ptr(), + len: 4, + }; + let mut out = FFI_ArrowSchema::empty(); + let mut err: *mut c_char = ptr::null_mut(); + let status = unsafe { df_scan_schema(bad, EMPTY_BYTES, EMPTY_BYTES, &mut out, &mut err) }; + assert_eq!(status, 2 /* DF_UNKNOWN_PROVIDER */); + let msg = unsafe { take_err(err) }.expect("error message"); + assert!(msg.contains("nope"), "msg was: {msg}"); +} + +#[test] +fn create_reports_two_partitions() { + demo::register(); + let handle = create_full_scan(); + let mut count = 0i32; + let mut err: *mut c_char = ptr::null_mut(); + let status = unsafe { df_scan_partition_count(handle, &mut count, &mut err) }; + assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) }); + assert_eq!(count, 2, "demo provider has two partitions"); + unsafe { df_scan_close(handle) }; +} + +#[test] +fn execute_partition_roundtrips_arrow_c_stream() { + demo::register(); + let handle = create_full_scan(); + + // Sum `id` across both partitions by importing each stream back through + // the Arrow C Stream interface, the way a foreign consumer would. + let mut total: i64 = 0; + let mut rows = 0usize; + for partition in 0..2 { + let mut stream = FFI_ArrowArrayStream::empty(); + let mut err: *mut c_char = ptr::null_mut(); + let status = unsafe { df_scan_execute_partition(handle, partition, &mut stream, &mut err) }; + assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) }); + + let reader = unsafe { ArrowArrayStreamReader::from_raw(&mut stream) } + .expect("import FFI_ArrowArrayStream"); + for batch in reader { + let batch = batch.expect("batch"); + rows += batch.num_rows(); + let ids = batch + .column(0) + .as_any() + .downcast_ref::() + .expect("id column is Int64"); + total += ids.values().iter().sum::(); + } + } + + assert_eq!(rows, 5, "3 + 2 rows across the two partitions"); + assert_eq!(total, 1 + 2 + 3 + 4 + 5); + unsafe { df_scan_close(handle) }; +} + +#[test] +fn limit_caps_row_count() { + demo::register(); + // demo provider has 5 rows across two partitions; cap at 2. + let mut handle: *mut DfScanHandle = ptr::null_mut(); + let mut err: *mut c_char = ptr::null_mut(); + let status = unsafe { + df_scan_create( + provider(), + EMPTY_BYTES, + EMPTY_BYTES, + 0, + 0, + 2, // limit + ptr::null(), + 0, + ptr::null(), + 0, + ptr::null(), + 0, + &mut handle, + &mut err, + ) + }; + assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) }); + + // Read the whole plan; the limit must hold across partitions. + let mut stream = FFI_ArrowArrayStream::empty(); + let mut err2: *mut c_char = ptr::null_mut(); + assert_eq!( + unsafe { df_scan_execute(handle, &mut stream, &mut err2) }, + 0, + "err: {:?}", + unsafe { take_err(err2) } + ); + let reader = unsafe { ArrowArrayStreamReader::from_raw(&mut stream) }.expect("import"); + let rows: usize = reader.map(|b| b.expect("batch").num_rows()).sum(); + assert_eq!(rows, 2, "limit should cap the scan at 2 rows"); + + unsafe { df_scan_close(handle) }; +} + +#[test] +fn close_is_null_safe() { + unsafe { df_scan_close(ptr::null_mut()) }; +} + +/// Plan a full scan (no projection / filters) over the demo provider. +fn create_full_scan() -> *mut DfScanHandle { + let mut handle: *mut DfScanHandle = ptr::null_mut(); + let mut err: *mut c_char = ptr::null_mut(); + let status = unsafe { + df_scan_create( + provider(), + EMPTY_BYTES, + EMPTY_BYTES, + 0, + 0, + -1, + ptr::null(), + 0, + ptr::null(), + 0, + ptr::null(), + 0, + &mut handle, + &mut err, + ) + }; + assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) }); + assert!(!handle.is_null()); + handle +} diff --git a/native-jni/Cargo.toml b/native-jni/Cargo.toml new file mode 100644 index 0000000..1001bf6 --- /dev/null +++ b/native-jni/Cargo.toml @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "datafusion-scan-jni" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +publish = false + +[lib] +# The JVM-loaded shim. Thin: it marshals Java args into the in-process scan +# core of `datafusion-scan-ffi` and writes Arrow C Stream / C Schema structs +# into the addresses arrow-java allocated. All Arrow data crosses via the C +# Data interface, not through JNI. +crate-type = ["cdylib"] + +[dependencies] +# The plain-C scan crate, used in-process. `demo-providers` registers the +# in-memory provider alongside `datafusion.listing` for testing. +datafusion-scan-ffi = { path = "../native-ffi", features = ["demo-providers"] } +# Arrow C interface types written into Java-allocated structs. +arrow = { workspace = true } +# Decodes the engine's ScanRequest blob. +prost = { workspace = true } +jni = { workspace = true } diff --git a/native-jni/src/lib.rs b/native-jni/src/lib.rs new file mode 100644 index 0000000..03dd9e8 --- /dev/null +++ b/native-jni/src/lib.rs @@ -0,0 +1,238 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Thin JNI shim over the plain-C scan core (`datafusion-scan-ffi`). +//! +//! This is the JVM's path to the scan ABI. It is deliberately minimal: it +//! marshals Java arguments (a `String` provider name and two `byte[]` blobs) +//! into the in-process scan core, hands back an opaque handle as a `jlong`, +//! and -- for the data plane -- writes a standard `FFI_ArrowArrayStream` (or +//! `FFI_ArrowSchema`) into the address arrow-java allocated. **No Arrow data +//! crosses the JNI boundary**: batches flow through the Arrow C Stream +//! interface, which arrow-java imports with `Data.importArrayStream`. +//! +//! Everything here mirrors `core`'s existing `DataFrame` collect path; the only +//! new ABI is the handful of `Java_org_apache_datafusion_scan_NativeScan_*` +//! entry points below. Non-Java consumers use the `df_scan_*` C symbols +//! exported by `datafusion-scan-ffi` instead; this crate is purely the JVM +//! adapter. + +use std::sync::OnceLock; + +use arrow::ffi::FFI_ArrowSchema; +use arrow::ffi_stream::FFI_ArrowArrayStream; +use datafusion_scan_ffi::proto::ScanRequest as ProtoScanRequest; +use datafusion_scan_ffi::scan::{self, ScanHandle, ScanRequest}; +use datafusion_scan_ffi::{demo, listing}; +use jni::objects::{JByteArray, JClass, JString}; +use jni::sys::{jint, jlong}; +use jni::JNIEnv; +use prost::Message; + +/// Register the in-tree providers exactly once. The shim is the registration +/// point for the JVM build; a non-Java embedder registers its own. +fn ensure_registered() { + static INIT: OnceLock<()> = OnceLock::new(); + INIT.get_or_init(|| { + listing::register(); + demo::register(); + }); +} + +/// Run `body`; on `Err`, throw a Java `RuntimeException` and return `default`. +/// Mirrors the project's existing `try_unwrap_or_throw` pattern. +fn try_or_throw( + env: &mut JNIEnv, + default: T, + body: impl FnOnce(&mut JNIEnv) -> Result, +) -> T { + match body(env) { + Ok(value) => value, + Err(message) => { + // If throwing fails there is nothing more we can do; the default is + // still returned so we don't leave the stack in a bad state. + let _ = env.throw_new("java/lang/RuntimeException", message); + default + } + } +} + +fn read_bytes(env: &mut JNIEnv, arr: &JByteArray) -> Result, String> { + if arr.is_null() { + Ok(Vec::new()) + } else { + env.convert_byte_array(arr).map_err(|e| e.to_string()) + } +} + +fn read_string(env: &mut JNIEnv, s: &JString) -> Result { + env.get_string(s).map(Into::into).map_err(|e| e.to_string()) +} + +/// Decode the engine's `ScanRequest` blob into the scan core's request, +/// borrowing the provider name and config bytes. Empty blob -> no pushdown. +fn build_request<'a>( + provider: &'a str, + config: &'a [u8], + scan_request: &[u8], +) -> Result, String> { + let req = if scan_request.is_empty() { + ProtoScanRequest::default() + } else { + ProtoScanRequest::decode(scan_request) + .map_err(|e| format!("failed to decode ScanRequest: {e}"))? + }; + Ok(ScanRequest { + provider, + options: config, + partition: &[], + target_partitions: req.target_partitions, + batch_size: req.batch_size, + limit: req.limit.map(|l| l as usize), + config_overrides: req.config_overrides.into_iter().collect(), + projection: req.projection, + filters: req.filters, + }) +} + +/// Probe a provider's output schema, writing an `FFI_ArrowSchema` into the +/// arrow-java-allocated `ArrowSchema` at `schema_addr`. +#[no_mangle] +pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_providerSchema<'local>( + mut env: JNIEnv<'local>, + _class: JClass<'local>, + provider: JString<'local>, + config: JByteArray<'local>, + schema_addr: jlong, +) { + ensure_registered(); + try_or_throw(&mut env, (), |env| { + if schema_addr == 0 { + return Err("schema address is null".to_string()); + } + let provider = read_string(env, &provider)?; + let config = read_bytes(env, &config)?; + let schema = scan::schema(&provider, &config, &[]).map_err(|e| e.message)?; + let ffi = FFI_ArrowSchema::try_from(schema.as_ref()).map_err(|e| e.to_string())?; + // SAFETY: arrow-java allocated an empty ArrowSchema at this address. + unsafe { std::ptr::write(schema_addr as *mut FFI_ArrowSchema, ffi) }; + Ok(()) + }) +} + +/// Plan a scan. Returns an opaque handle (boxed [`ScanHandle`] pointer) as a +/// `jlong`, or 0 after throwing on error. Release with `closeScan`. +#[no_mangle] +pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_createScan<'local>( + mut env: JNIEnv<'local>, + _class: JClass<'local>, + provider: JString<'local>, + config: JByteArray<'local>, + scan_request: JByteArray<'local>, +) -> jlong { + ensure_registered(); + try_or_throw(&mut env, 0, |env| { + let provider = read_string(env, &provider)?; + let config = read_bytes(env, &config)?; + let scan_request = read_bytes(env, &scan_request)?; + let request = build_request(&provider, &config, &scan_request)?; + let handle = scan::create(request).map_err(|e| e.message)?; + Ok(Box::into_raw(Box::new(handle)) as jlong) + }) +} + +/// Output partition count of a planned scan. +#[no_mangle] +pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_partitionCount<'local>( + mut env: JNIEnv<'local>, + _class: JClass<'local>, + handle: jlong, +) -> jint { + try_or_throw(&mut env, 0, |_env| { + let scan = handle_ref(handle)?; + Ok(scan.partition_count() as jint) + }) +} + +/// Execute one partition, writing an `FFI_ArrowArrayStream` into the +/// arrow-java-allocated `ArrowArrayStream` at `stream_addr`. +#[no_mangle] +pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_executeStreamPartition<'local>( + mut env: JNIEnv<'local>, + _class: JClass<'local>, + handle: jlong, + partition: jint, + stream_addr: jlong, +) { + try_or_throw(&mut env, (), |_env| { + if partition < 0 { + return Err("partition index is negative".to_string()); + } + let scan = handle_ref(handle)?; + let reader = scan + .execute_partition(partition as usize) + .map_err(|e| e.message)?; + write_stream(stream_addr, FFI_ArrowArrayStream::new(Box::new(reader))) + }) +} + +/// Execute the whole plan as a single coalesced stream. +#[no_mangle] +pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_executeStream<'local>( + mut env: JNIEnv<'local>, + _class: JClass<'local>, + handle: jlong, + stream_addr: jlong, +) { + try_or_throw(&mut env, (), |_env| { + let scan = handle_ref(handle)?; + let reader = scan.execute_all().map_err(|e| e.message)?; + write_stream(stream_addr, FFI_ArrowArrayStream::new(Box::new(reader))) + }) +} + +/// Drop a planned scan. Null-safe; must not race an in-flight execute on the +/// same handle (the Java wrapper enforces this). +#[no_mangle] +pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_closeScan<'local>( + _env: JNIEnv<'local>, + _class: JClass<'local>, + handle: jlong, +) { + if handle != 0 { + // SAFETY: handle came from createScan and is not used afterwards. + drop(unsafe { Box::from_raw(handle as *mut ScanHandle) }); + } +} + +/// Borrow a [`ScanHandle`] from a `jlong`, erroring on null. +fn handle_ref<'a>(handle: jlong) -> Result<&'a ScanHandle, String> { + if handle == 0 { + return Err("scan handle is null".to_string()); + } + // SAFETY: handle came from createScan and outlives this borrow. + Ok(unsafe { &*(handle as *const ScanHandle) }) +} + +fn write_stream(stream_addr: jlong, ffi: FFI_ArrowArrayStream) -> Result<(), String> { + if stream_addr == 0 { + return Err("stream address is null".to_string()); + } + // SAFETY: arrow-java allocated an empty ArrowArrayStream at this address. + unsafe { std::ptr::write(stream_addr as *mut FFI_ArrowArrayStream, ffi) }; + Ok(()) +} diff --git a/native/Cargo.toml b/native/Cargo.toml index 0362ae6..c040448 100644 --- a/native/Cargo.toml +++ b/native/Cargo.toml @@ -17,14 +17,17 @@ [package] name = "datafusion-jni" -version = "0.1.0" -edition = "2021" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true +# cdylib JNI artifact loaded by the JVM, not a crates.io library. publish = false [lib] # `rlib` alongside `cdylib` so `cargo test` has a Rust-level harness for -# native-only invariants (e.g. error-classification routing through wrapped -# DataFusionError chains). The `cdylib` is still the artifact the JVM loads. +# native-only invariants (the error-classification tests now live in +# `datafusion-jni-common`). The `cdylib` is still the artifact the JVM loads. crate-type = ["cdylib", "rlib"] [features] @@ -75,28 +78,27 @@ runtime-metrics = ["dep:tokio-metrics"] spark = ["dep:datafusion-spark"] [dependencies] -arrow = { version = "58", features = ["ffi"] } -async-trait = "0.1" -datafusion = { version = "53.1.0", features = ["avro"] } -datafusion-proto = "53.1.0" +arrow = { workspace = true } +async-trait = { workspace = true } +datafusion = { workspace = true, features = ["avro"] } +# Shared JNI plumbing (error->exception mapping, runtime singleton, +# StreamingReader). `avro` keeps the classifier's AvroError->IoException arm +# in sync with the `avro` feature on `datafusion` above. +datafusion-jni-common = { path = "../native-common", features = ["avro"] } +datafusion-proto = { workspace = true } # Apache Spark-compatible functions + expression planners. Optional and # gated behind the `spark` feature (in the default set). The `core` feature # of the crate is what exposes `SessionStateBuilderSpark`. -datafusion-spark = { version = "53.1.0", features = ["core"], optional = true } -datafusion-substrait = { version = "53.1.0", optional = true } -futures = "0.3" -jni = "0.21" -# Pin to the same major as DataFusion 53.1 pulls in transitively (0.13.x) -# so we share the same `dyn ObjectStore` vtable and don't double-link. -object_store = { version = "0.13", default-features = false } -prost = "0.14" -tokio = { version = "1", features = ["rt-multi-thread"] } -# Tokio runtime metrics. Optional + cfg-gated: this crate's API surface lives -# behind `--cfg tokio_unstable`, so enabling the `runtime-metrics` feature also -# requires the caller to set `RUSTFLAGS="--cfg tokio_unstable"` at build time. -tokio-metrics = { version = "0.5", optional = true } -url = "2" +datafusion-spark = { workspace = true, features = ["core"], optional = true } +datafusion-substrait = { workspace = true, optional = true } +futures = { workspace = true } +jni = { workspace = true } +object_store = { workspace = true } +prost = { workspace = true } +tokio = { workspace = true } +tokio-metrics = { workspace = true, optional = true } +url = { workspace = true } [build-dependencies] -prost-build = "0.14" -protoc-bin-vendored = "3" +prost-build = { workspace = true } +protoc-bin-vendored = { workspace = true } diff --git a/native/src/arrow.rs b/native/src/arrow.rs index 2bbe7b0..67e5caf 100644 --- a/native/src/arrow.rs +++ b/native/src/arrow.rs @@ -23,10 +23,10 @@ use jni::sys::jlong; use jni::JNIEnv; use prost::Message; -use crate::errors::{try_unwrap_or_throw, JniResult}; use crate::proto_gen::ArrowReadOptionsProto; use crate::runtime; use crate::schema::decode_optional_schema; +use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult}; fn with_arrow_options( env: &mut JNIEnv, diff --git a/native/src/avro.rs b/native/src/avro.rs index 85d4a07..257ae32 100644 --- a/native/src/avro.rs +++ b/native/src/avro.rs @@ -23,10 +23,10 @@ use jni::sys::jlong; use jni::JNIEnv; use prost::Message; -use crate::errors::{try_unwrap_or_throw, JniResult}; use crate::proto_gen::AvroReadOptionsProto; use crate::runtime; use crate::schema::decode_optional_schema; +use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult}; fn with_avro_options( env: &mut JNIEnv, diff --git a/native/src/cache_manager.rs b/native/src/cache_manager.rs index 3b9e286..ec38dc8 100644 --- a/native/src/cache_manager.rs +++ b/native/src/cache_manager.rs @@ -34,8 +34,8 @@ use datafusion::execution::cache::cache_unit::{ }; use datafusion::execution::cache::DefaultListFilesCache; -use crate::errors::JniResult; use crate::proto_gen::CacheManagerOptionsProto; +use datafusion_jni_common::errors::JniResult; /// Build a [`CacheManagerConfig`] from the proto. Returns `Ok(None)` if the /// caller did not set any cache-manager field, so the JNI layer can skip the diff --git a/native/src/csv.rs b/native/src/csv.rs index 3ae4627..b79ed59 100644 --- a/native/src/csv.rs +++ b/native/src/csv.rs @@ -26,12 +26,12 @@ use jni::sys::jlong; use jni::JNIEnv; use prost::Message; -use crate::errors::{try_unwrap_or_throw, JniResult}; use crate::proto_gen::{ CsvReadOptionsProto, CsvWriteOptionsProto, FileCompressionType as ProtoFileCompressionType, }; use crate::runtime; use crate::schema::decode_optional_schema; +use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult}; fn with_csv_options( env: &mut JNIEnv, diff --git a/native/src/json.rs b/native/src/json.rs index 8eea32f..b87be78 100644 --- a/native/src/json.rs +++ b/native/src/json.rs @@ -27,12 +27,12 @@ use jni::sys::jlong; use jni::JNIEnv; use prost::Message; -use crate::errors::{try_unwrap_or_throw, JniResult}; use crate::proto_gen::{ FileCompressionType as ProtoFileCompressionType, JsonWriteOptionsProto, NdJsonReadOptionsProto, }; use crate::runtime; use crate::schema::decode_optional_schema; +use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult}; fn with_json_options( env: &mut JNIEnv, diff --git a/native/src/lib.rs b/native/src/lib.rs index 43161d2..56bef5d 100644 --- a/native/src/lib.rs +++ b/native/src/lib.rs @@ -19,7 +19,6 @@ mod arrow; mod avro; mod cache_manager; mod csv; -mod errors; mod jni_util; mod json; mod memory; @@ -34,16 +33,13 @@ pub(crate) mod proto_gen { include!(concat!(env!("OUT_DIR"), "/datafusion_java.rs")); } -use std::panic::{catch_unwind, AssertUnwindSafe}; use std::path::PathBuf; use std::sync::{Arc, OnceLock}; -use datafusion::arrow::array::RecordBatch; use datafusion::arrow::datatypes::SchemaRef; -use datafusion::arrow::error::ArrowError; use datafusion::arrow::ffi_stream::FFI_ArrowArrayStream; use datafusion::arrow::ipc::writer::StreamWriter; -use datafusion::arrow::record_batch::{RecordBatchIterator, RecordBatchReader}; +use datafusion::arrow::record_batch::RecordBatchIterator; use datafusion::common::{JoinType, UnnestOptions}; use datafusion::config::TableParquetOptions; use datafusion::dataframe::DataFrame; @@ -51,11 +47,9 @@ use datafusion::dataframe::DataFrameWriteOptions; use datafusion::error::DataFusionError; use datafusion::execution::disk_manager::{DiskManagerBuilder, DiskManagerMode}; use datafusion::execution::runtime_env::{RuntimeEnv, RuntimeEnvBuilder}; -use datafusion::execution::SendableRecordBatchStream; use datafusion::logical_expr::Expr; use datafusion::logical_expr::{col, Partitioning, ScalarUDF, Signature, SortExpr}; use datafusion::prelude::{ParquetReadOptions, SessionConfig, SessionContext}; -use futures::StreamExt; use jni::objects::{JBooleanArray, JByteArray, JClass, JObject, JObjectArray, JString}; use jni::sys::{jboolean, jbyte, jbyteArray, jint, jlong}; use jni::JNIEnv; @@ -63,7 +57,10 @@ use jni::JavaVM; use prost::Message; use tokio::runtime::Runtime; -use crate::errors::{try_unwrap_or_throw, JniResult}; +use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult}; +// Re-exported so sibling modules keep their crate-local `crate::StreamingReader` path. +pub(crate) use datafusion_jni_common::StreamingReader; + use crate::proto_gen::ParquetReadOptionsProto; use crate::proto_gen::SessionOptions; use crate::schema::decode_optional_schema; @@ -84,18 +81,15 @@ pub(crate) fn jvm() -> &'static JavaVM { } pub(crate) fn runtime() -> &'static Runtime { - static RT: OnceLock = OnceLock::new(); - RT.get_or_init(|| { - let rt = Runtime::new().expect("failed to create Tokio runtime"); - // Eagerly install the runtime-metrics accumulator (no-op when the - // `runtime-metrics` Cargo feature is off). Initialising here -- not - // lazily on the first `runtimeStats()` call -- means the - // RuntimeMonitor's sampling baseline coincides with runtime start, so - // poll/park/busy totals reflect activity from the first query onward - // rather than from the first observation. - crate::runtime_metrics::init(rt.handle()); - rt - }) + // The singleton itself lives in datafusion-jni-common (shared with the + // datafusion-spark-bridge SDK; each cdylib statically links its own + // copy, so the runtime stays per-library). The init hook eagerly installs the + // runtime-metrics accumulator (no-op when the `runtime-metrics` Cargo + // feature is off). Initialising here -- not lazily on the first + // `runtimeStats()` call -- means the RuntimeMonitor's sampling baseline + // coincides with runtime start, so poll/park/busy totals reflect activity + // from the first query onward rather than from the first observation. + datafusion_jni_common::runtime_with_init(crate::runtime_metrics::init) } /// Wrap the (already-built) `RuntimeEnvBuilder`'s memory pool with a @@ -324,50 +318,6 @@ pub extern "system" fn Java_org_apache_datafusion_DataFrame_collectDataFrame<'lo }) } -/// Bridges DataFusion's async [`SendableRecordBatchStream`] to the synchronous -/// [`RecordBatchReader`] interface that `FFI_ArrowArrayStream` (and therefore -/// the Java `ArrowReader`) consumes. Each call to `next()` drives one -/// `runtime().block_on(stream.next())`, so memory pressure stays bounded by the -/// executor pipeline plus a single in-flight batch. -struct StreamingReader { - schema: SchemaRef, - stream: SendableRecordBatchStream, -} - -impl Iterator for StreamingReader { - type Item = Result; - - fn next(&mut self) -> Option { - // Arrow's C ABI invokes this iterator through FFI_ArrowArrayStream's - // vtable, outside the JNI handler's try_unwrap_or_throw guard. A panic - // here (buggy UDF, arrow cast that panics, runtime poison) would - // unwind across C/FFI -- undefined behaviour. Catch it and surface as - // an ArrowError so the Java side sees a normal exception instead. - let next = catch_unwind(AssertUnwindSafe(|| runtime().block_on(self.stream.next()))); - match next { - Ok(item) => item.map(|r| r.map_err(|e| ArrowError::ExternalError(Box::new(e)))), - Err(panic) => { - let msg = if let Some(s) = panic.downcast_ref::() { - s.clone() - } else if let Some(s) = panic.downcast_ref::<&str>() { - (*s).to_string() - } else { - "rust panic with non-string payload".to_string() - }; - Some(Err(ArrowError::ExternalError( - format!("panic in DataFrame stream: {msg}").into(), - ))) - } - } - } -} - -impl RecordBatchReader for StreamingReader { - fn schema(&self) -> SchemaRef { - self.schema.clone() - } -} - #[no_mangle] pub extern "system" fn Java_org_apache_datafusion_DataFrame_executeStreamDataFrame<'local>( mut env: JNIEnv<'local>, diff --git a/native/src/object_store.rs b/native/src/object_store.rs index eefccf2..985d721 100644 --- a/native/src/object_store.rs +++ b/native/src/object_store.rs @@ -28,9 +28,9 @@ use std::sync::Arc; use datafusion::prelude::SessionContext; use url::Url; -use crate::errors::JniResult; use crate::proto_gen::object_store_registration::Backend; use crate::proto_gen::ObjectStoreRegistration; +use datafusion_jni_common::errors::JniResult; #[cfg(feature = "object-store-gcp")] use crate::proto_gen::GcsOptions; diff --git a/native/src/proto.rs b/native/src/proto.rs index 4f187bc..c1315f9 100644 --- a/native/src/proto.rs +++ b/native/src/proto.rs @@ -28,8 +28,8 @@ use jni::sys::{jbyteArray, jlong}; use jni::JNIEnv; use prost::Message; -use crate::errors::{try_unwrap_or_throw, JniResult}; use crate::runtime; +use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult}; #[no_mangle] pub extern "system" fn Java_org_apache_datafusion_SessionContext_createDataFrameFromProto< diff --git a/native/src/runtime_metrics.rs b/native/src/runtime_metrics.rs index e69410e..dd60dcb 100644 --- a/native/src/runtime_metrics.rs +++ b/native/src/runtime_metrics.rs @@ -38,7 +38,7 @@ //! 10 totalOverflowCount #[cfg(not(feature = "runtime-metrics"))] -use crate::errors::JniResult; +use datafusion_jni_common::errors::JniResult; /// Number of i64 values in the snapshot array; kept here so the Java side and /// the feature-off stub agree on the layout. @@ -51,7 +51,7 @@ mod imp { use tokio_metrics::{RuntimeIntervals, RuntimeMonitor}; use super::STATS_FIELD_COUNT; - use crate::errors::JniResult; + use datafusion_jni_common::errors::JniResult; /// `RuntimeMonitor::intervals().next()` returns *delta* metrics covering /// the period since the previous call (or, on the very first call, since @@ -196,7 +196,7 @@ pub fn runtime_stats() -> JniResult<[i64; STATS_FIELD_COUNT]> { Err( "datafusion-jni was built without the `runtime-metrics` Cargo feature; \ rebuild the native crate with \ - `RUSTFLAGS=\"--cfg tokio_unstable\" cargo build --features runtime-metrics` \ + `RUSTFLAGS=\"--cfg tokio_unstable\" cargo build -p datafusion-jni --features runtime-metrics` \ to enable SessionContext.runtimeStats" .into(), ) diff --git a/native/src/schema.rs b/native/src/schema.rs index 968a73a..0c3c7ab 100644 --- a/native/src/schema.rs +++ b/native/src/schema.rs @@ -20,7 +20,7 @@ use datafusion::arrow::ipc::reader::StreamReader; use jni::objects::JByteArray; use jni::JNIEnv; -use crate::errors::JniResult; +use datafusion_jni_common::errors::JniResult; /// Decode an optional Arrow-IPC schema byte array passed in from Java. /// Returns `None` if the byte-array reference is null. diff --git a/pom.xml b/pom.xml index 6210841..a48be6c 100644 --- a/pom.xml +++ b/pom.xml @@ -33,6 +33,7 @@ under the License. core examples + spark @@ -95,6 +96,11 @@ under the License. + + org.apache.maven.plugins + maven-compiler-plugin + 3.13.0 + org.apache.maven.plugins maven-surefire-plugin @@ -173,10 +179,10 @@ under the License. .mvn/** **/target/** - native/target/** + rust-target/** tpch-data/** - - native/Cargo.lock + + Cargo.lock dev/release/rat_exclude_files.txt diff --git a/proto/scan_config.proto b/proto/scan_config.proto new file mode 100644 index 0000000..43593bf --- /dev/null +++ b/proto/scan_config.proto @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +syntax = "proto3"; + +package datafusion_java; + +import "csv_read_options.proto"; +import "json_read_options.proto"; +import "parquet_read_options.proto"; +import "avro_read_options.proto"; +import "arrow_read_options.proto"; + +option java_package = "org.apache.datafusion.protobuf"; +option java_multiple_files = true; + +// Provider configuration carried in the `options` byte blob of the plain-C +// scan ABI (`df_scan_schema` / `df_scan_create`). The ABI itself treats these +// bytes as opaque; the registered provider builder named by `provider` decodes +// them. This message is the encoding the in-tree builders agree on -- a custom +// builder may ignore it and define its own. +// +// `provider` selects the registered builder (e.g. "datafusion.listing", +// "datafusion.memory"). `source` carries that builder's parameters; `custom` +// is an escape hatch for builders that define their own wire format. +message ScanConfig { + string provider = 1; + + oneof source { + ListingSource listing = 2; + bytes custom = 15; + } +} + +// A file-backed listing source: one or more paths/URIs read with a single +// file format. Mirrors DataFusion's ListingTable inputs. Object-store +// credentials/endpoints are configured out of band (registered on the context +// by the embedding cdylib), not here. +message ListingSource { + // Files or directories. Globs and object-store URIs (s3://, gs://, ...) are + // allowed where the registered object store supports them. + repeated string paths = 1; + + // The file format and its read options. Reuses the existing per-format + // option messages so encoders are shared with the rest of the binding. + oneof format { + CsvReadOptionsProto csv = 2; + NdJsonReadOptionsProto json = 3; + ParquetReadOptionsProto parquet = 4; + AvroReadOptionsProto avro = 5; + ArrowReadOptionsProto arrow = 6; + } + + // Optional explicit schema as Arrow IPC schema-message bytes. Unset lets the + // provider infer it (e.g. from Parquet metadata or by sampling). + optional bytes schema_ipc = 7; +} + +// Per-partition slice descriptor carried in the `partition` byte blob. Empty +// for a whole-table scan. `index` lets the driver hand each executor task its +// slice; `opaque` is builder-defined (e.g. a serialized file group), letting a +// provider partition however it likes without the ABI knowing the shape. +message ScanPartition { + uint32 index = 1; + bytes opaque = 2; +} diff --git a/proto/scan_request.proto b/proto/scan_request.proto new file mode 100644 index 0000000..1770ee1 --- /dev/null +++ b/proto/scan_request.proto @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +syntax = "proto3"; + +package datafusion_java; + +option java_package = "org.apache.datafusion.protobuf"; +option java_multiple_files = true; + +// The pushdown a query engine (Spark DataSourceV2, etc.) captures for a scan. +// +// This is the *staging* object the engine populates during planning. It maps +// onto the typed arguments of `df_scan_create` rather than being passed as a +// single blob: the JNI shim / FFM layer decodes a ScanRequest and explodes it +// into the call's `projection` / `filters` / `target_partitions` / ... +// arguments. Keeping the C ABI's arguments typed (not one opaque protobuf) +// keeps it FFM-friendly and language-neutral; this message just gives the +// engine one structured thing to build and serialize across its own layers +// (e.g. driver -> executor task) before the shim makes the native call. +// +// It is deliberately NOT the provider config: which provider and its +// parameters live in ScanConfig (the `options` blob). A ScanRequest is purely +// "given that provider, here is what to read." +message ScanRequest { + // Pruned columns to project, by name. Empty selects all columns. Names + // match the provider's (pre-widening) output schema. + repeated string projection = 1; + + // Pushed filters, each a serialized `datafusion.LogicalExprNode` (the same + // encoding `datafusion-ffi` uses). The engine translates whichever of its + // own predicates it can express and leaves the rest for itself to apply. + // The provider receives them as a conjunction (AND). + repeated bytes filters = 2; + + // Optional row limit pushed into the scan. Unset means no limit. Advisory: + // the engine must still enforce its own limit, since not every plan honors + // it exactly. + optional uint64 limit = 3; + + // Execution tuning resolved once on the driver and shipped to every executor + // so partition counts stay deterministic. <= 0 leaves the DataFusion + // default in place (matches the C ABI's convention). + int32 target_partitions = 4; + int32 batch_size = 5; + + // Session config overrides applied to the scan's private context, e.g. + // {"datafusion.execution.parquet.pushdown_filters": "true"}. Resolved on the + // driver alongside the tuning above. + map config_overrides = 6; +} diff --git a/spark/pom.xml b/spark/pom.xml new file mode 100644 index 0000000..26af4f1 --- /dev/null +++ b/spark/pom.xml @@ -0,0 +1,116 @@ + + + + 4.0.0 + + + org.apache.datafusion + datafusion-java-parent + 0.2.0-SNAPSHOT + + + datafusion-spark + DataFusion Spark DataSource + A Spark DataSourceV2 backed by a DataFusion TableProvider via the plain-C scan ABI. + + + 4.0.0 + 2.13 + + 18.1.0 + + + + + + org.apache.datafusion + datafusion-java + ${project.version} + + + org.apache.arrow + * + + + + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + provided + + + + + org.apache.arrow + arrow-c-data + ${spark.arrow.version} + provided + + + + + org.junit.jupiter + junit-jupiter + test + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + + + + -Djava.library.path=${maven.multiModuleProjectDirectory}/rust-target/debug + --add-opens=java.base/java.lang=ALL-UNNAMED + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED + --add-opens=java.base/java.io=ALL-UNNAMED + --add-opens=java.base/java.net=ALL-UNNAMED + --add-opens=java.base/java.nio=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED + --add-opens=java.base/sun.security.action=ALL-UNNAMED + + + + + + diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionColumnarPartitionReader.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionColumnarPartitionReader.java new file mode 100644 index 0000000..7dbb27b --- /dev/null +++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionColumnarPartitionReader.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.spark; + +import java.io.IOException; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.datafusion.scan.DatafusionScan; +import org.apache.spark.sql.connector.read.PartitionReader; +import org.apache.spark.sql.vectorized.ArrowColumnVector; +import org.apache.spark.sql.vectorized.ColumnVector; +import org.apache.spark.sql.vectorized.ColumnarBatch; + +/** + * Reads one scan partition as Spark {@link ColumnarBatch}es, zero-copy. + * + *

The Arrow vectors imported from the native stream are wrapped directly in Spark {@link + * ArrowColumnVector}s -- no per-cell copy. This requires the executor JVM to have a single + * arrow-java (the cluster's Spark Arrow); the connector compiles against that version and never + * bundles its own, so our import and Spark's {@code ArrowColumnVector} share the same classes. + * + *

Lifecycle: the underlying Arrow vectors are owned by the {@link ArrowReader}. We do not close + * the {@link ColumnarBatch} (which would close those vectors a second time); {@link #close()} + * closes the reader -- freeing the vectors once -- and then the allocator. + */ +final class DatafusionColumnarPartitionReader implements PartitionReader { + + private final BufferAllocator allocator; + private final DatafusionScan scan; + private final ArrowReader reader; + private final VectorSchemaRoot root; + private final ColumnarBatch batch; + + DatafusionColumnarPartitionReader(DatafusionInputPartition partition) { + this.allocator = new RootAllocator(); + try { + this.scan = + DatafusionScan.create(partition.provider, partition.config, partition.scanRequest); + this.reader = scan.executePartition(allocator, partition.index); + this.root = reader.getVectorSchemaRoot(); + this.batch = new ColumnarBatch(wrap(root)); + } catch (IOException e) { + allocator.close(); + throw new RuntimeException("failed to open scan partition " + partition.index, e); + } catch (RuntimeException e) { + allocator.close(); + throw e; + } + } + + /** Wrap each Arrow vector of the (reused) root as a Spark column vector, once. */ + private static ColumnVector[] wrap(VectorSchemaRoot root) { + ColumnVector[] columns = new ColumnVector[root.getFieldVectors().size()]; + int i = 0; + for (FieldVector vector : root.getFieldVectors()) { + columns[i++] = new ArrowColumnVector(vector); + } + return columns; + } + + @Override + public boolean next() throws IOException { + // The root's vectors are reloaded in place each batch; skip empty batches. + while (reader.loadNextBatch()) { + int rows = root.getRowCount(); + if (rows > 0) { + batch.setNumRows(rows); + return true; + } + } + return false; + } + + @Override + public ColumnarBatch get() { + return batch; + } + + @Override + public void close() throws IOException { + try { + reader.close(); + } finally { + try { + scan.close(); + } finally { + allocator.close(); + } + } + } +} diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionInputPartition.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionInputPartition.java new file mode 100644 index 0000000..8152aad --- /dev/null +++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionInputPartition.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.spark; + +import org.apache.spark.sql.connector.read.InputPartition; + +/** + * A serializable slice of a scan shipped to an executor. Carries only bytes and an index -- never a + * native handle, which would be meaningless in another process. The executor rebuilds the provider + * from {@code config} and runs partition {@code index}. + */ +final class DatafusionInputPartition implements InputPartition { + + private static final long serialVersionUID = 1L; + + final String provider; + final byte[] config; + final byte[] scanRequest; + final int index; + + DatafusionInputPartition(String provider, byte[] config, byte[] scanRequest, int index) { + this.provider = provider; + this.config = config; + this.scanRequest = scanRequest; + this.index = index; + } +} diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionPartitionReaderFactory.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionPartitionReaderFactory.java new file mode 100644 index 0000000..2442eb2 --- /dev/null +++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionPartitionReaderFactory.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.spark; + +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.connector.read.InputPartition; +import org.apache.spark.sql.connector.read.PartitionReader; +import org.apache.spark.sql.connector.read.PartitionReaderFactory; +import org.apache.spark.sql.vectorized.ColumnarBatch; + +/** + * Creates a columnar reader per partition. Serialized to executors, so it holds no state. + * + *

Reads are columnar: {@link #supportColumnarReads} returns true, so Spark calls {@link + * #createColumnarReader} and consumes Arrow buffers directly via {@link + * DatafusionColumnarPartitionReader}. The row reader is unsupported. + */ +final class DatafusionPartitionReaderFactory implements PartitionReaderFactory { + + private static final long serialVersionUID = 1L; + + @Override + public boolean supportColumnarReads(InputPartition partition) { + return true; + } + + @Override + public PartitionReader createColumnarReader(InputPartition partition) { + return new DatafusionColumnarPartitionReader((DatafusionInputPartition) partition); + } + + @Override + public PartitionReader createReader(InputPartition partition) { + throw new UnsupportedOperationException("datafusion source reads are columnar"); + } +} diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanBuilder.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanBuilder.java new file mode 100644 index 0000000..9cafd37 --- /dev/null +++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanBuilder.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.spark; + +import java.util.List; + +import org.apache.datafusion.protobuf.ScanRequest; +import org.apache.spark.sql.connector.read.Scan; +import org.apache.spark.sql.connector.read.ScanBuilder; +import org.apache.spark.sql.connector.read.SupportsPushDownFilters; +import org.apache.spark.sql.connector.read.SupportsPushDownLimit; +import org.apache.spark.sql.connector.read.SupportsPushDownRequiredColumns; +import org.apache.spark.sql.sources.Filter; +import org.apache.spark.sql.types.StructType; + +import com.google.protobuf.ByteString; + +/** + * Captures Spark's projection, filter, and limit pushdown, encoding them into the {@code + * ScanRequest} the scan ABI consumes. + */ +final class DatafusionScanBuilder + implements ScanBuilder, + SupportsPushDownRequiredColumns, + SupportsPushDownFilters, + SupportsPushDownLimit { + + private final String provider; + private final byte[] config; + + private StructType requiredSchema; + private Filter[] pushedFilters = new Filter[0]; + private List pushedFilterBytes = List.of(); + private int limit = -1; + + DatafusionScanBuilder(StructType fullSchema, String provider, byte[] config) { + this.provider = provider; + this.config = config; + this.requiredSchema = fullSchema; + } + + @Override + public void pruneColumns(StructType requiredSchema) { + this.requiredSchema = requiredSchema; + } + + @Override + public Filter[] pushFilters(Filter[] filters) { + SparkFilters.Result result = SparkFilters.split(filters); + this.pushedFilters = result.pushedFilters(); + this.pushedFilterBytes = result.pushed(); + return result.postScan(); + } + + @Override + public Filter[] pushedFilters() { + return pushedFilters; + } + + @Override + public boolean pushLimit(int limit) { + // DataFusion enforces the limit exactly (df.limit after filters), and a + // limited plan coalesces to a single output partition, so the total row + // count is bounded. Report it as fully handled. + this.limit = limit; + return true; + } + + @Override + public Scan build() { + ScanRequest.Builder request = ScanRequest.newBuilder(); + for (String name : requiredSchema.fieldNames()) { + request.addProjection(name); + } + for (byte[] filter : pushedFilterBytes) { + request.addFilters(ByteString.copyFrom(filter)); + } + if (limit >= 0) { + request.setLimit(limit); + } + return new DatafusionScanImpl(provider, config, request.build().toByteArray(), requiredSchema); + } +} diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanImpl.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanImpl.java new file mode 100644 index 0000000..3a48fba --- /dev/null +++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanImpl.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.spark; + +import org.apache.datafusion.scan.DatafusionScan; +import org.apache.spark.sql.connector.read.Batch; +import org.apache.spark.sql.connector.read.InputPartition; +import org.apache.spark.sql.connector.read.PartitionReaderFactory; +import org.apache.spark.sql.connector.read.Scan; +import org.apache.spark.sql.types.StructType; + +/** + * A planned DataFusion scan as a Spark {@link Scan}/{@link Batch}. + * + *

{@link #planInputPartitions()} runs on the driver: it plans once to learn the partition count, + * then emits one serializable {@link DatafusionInputPartition} per partition carrying the config + + * request bytes (never a native handle). Each executor rebuilds and runs its own partition. + */ +final class DatafusionScanImpl implements Scan, Batch { + + private final String provider; + private final byte[] config; + private final byte[] scanRequest; + private final StructType readSchema; + + DatafusionScanImpl(String provider, byte[] config, byte[] scanRequest, StructType readSchema) { + this.provider = provider; + this.config = config; + this.scanRequest = scanRequest; + this.readSchema = readSchema; + } + + /** The encoded ScanRequest bytes. Package-private for pushdown unit tests. */ + byte[] scanRequestBytes() { + return scanRequest; + } + + @Override + public StructType readSchema() { + return readSchema; + } + + @Override + public Batch toBatch() { + return this; + } + + @Override + public InputPartition[] planInputPartitions() { + int partitions; + try (DatafusionScan scan = DatafusionScan.create(provider, config, scanRequest)) { + partitions = scan.partitionCount(); + } + InputPartition[] result = new InputPartition[partitions]; + for (int i = 0; i < partitions; i++) { + result[i] = new DatafusionInputPartition(provider, config, scanRequest, i); + } + return result; + } + + @Override + public PartitionReaderFactory createReaderFactory() { + return new DatafusionPartitionReaderFactory(); + } +} diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionTable.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionTable.java new file mode 100644 index 0000000..d2e8f9d --- /dev/null +++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionTable.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.spark; + +import java.util.EnumSet; +import java.util.Set; + +import org.apache.spark.sql.connector.catalog.SupportsRead; +import org.apache.spark.sql.connector.catalog.TableCapability; +import org.apache.spark.sql.connector.read.ScanBuilder; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +/** A readable table over a DataFusion provider; produces {@link DatafusionScanBuilder}s. */ +final class DatafusionTable implements SupportsRead { + + private final StructType schema; + private final String provider; + private final byte[] config; + + DatafusionTable(StructType schema, String provider, byte[] config) { + this.schema = schema; + this.provider = provider; + this.config = config; + } + + @Override + public String name() { + return "datafusion"; + } + + @Override + public StructType schema() { + return schema; + } + + @Override + public Set capabilities() { + return EnumSet.of(TableCapability.BATCH_READ); + } + + @Override + public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) { + return new DatafusionScanBuilder(schema, provider, config); + } +} diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionTableProvider.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionTableProvider.java new file mode 100644 index 0000000..5d837d5 --- /dev/null +++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionTableProvider.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.spark; + +import java.util.Map; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.datafusion.scan.DatafusionScan; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableProvider; +import org.apache.spark.sql.connector.expressions.Transform; +import org.apache.spark.sql.sources.DataSourceRegister; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +/** + * Entry point for the {@code datafusion} Spark data source. + * + *

Registered via {@code DataSourceRegister} so {@code + * spark.read.format("datafusion").option("path", ...).load()} resolves here. Options are decoded + * into a {@code ScanConfig} ({@link OptionsCodec}); the schema is probed once, on the driver, + * through {@link DatafusionScan#schema}. + */ +public final class DatafusionTableProvider implements TableProvider, DataSourceRegister { + + @Override + public String shortName() { + return "datafusion"; + } + + @Override + public StructType inferSchema(CaseInsensitiveStringMap options) { + OptionsCodec.Source source = OptionsCodec.fromOptions(options); + try (BufferAllocator allocator = new RootAllocator()) { + Schema arrow = DatafusionScan.schema(allocator, source.provider(), source.config()); + return SchemaConverter.toSparkSchema(arrow); + } + } + + @Override + public Table getTable( + StructType schema, Transform[] partitioning, Map properties) { + OptionsCodec.Source source = OptionsCodec.fromOptions(new CaseInsensitiveStringMap(properties)); + return new DatafusionTable(schema, source.provider(), source.config()); + } + + @Override + public boolean supportsExternalMetadata() { + return false; + } +} diff --git a/spark/src/main/java/org/apache/datafusion/spark/OptionsCodec.java b/spark/src/main/java/org/apache/datafusion/spark/OptionsCodec.java new file mode 100644 index 0000000..7aaed5c --- /dev/null +++ b/spark/src/main/java/org/apache/datafusion/spark/OptionsCodec.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.spark; + +import java.util.Locale; + +import org.apache.datafusion.protobuf.CsvReadOptionsProto; +import org.apache.datafusion.protobuf.ListingSource; +import org.apache.datafusion.protobuf.NdJsonReadOptionsProto; +import org.apache.datafusion.protobuf.ParquetReadOptionsProto; +import org.apache.datafusion.protobuf.ScanConfig; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +/** + * Translates Spark data-source options into a {@code ScanConfig} for the {@code datafusion.listing} + * provider. + * + *

Recognized options: {@code path} (required), {@code format} ({@code csv|parquet|json}, default + * inferred from the path extension then {@code csv}), and for CSV {@code header} (default true) and + * {@code delimiter} (default {@code ,}). + */ +final class OptionsCodec { + + static final String PROVIDER = "datafusion.listing"; + + private OptionsCodec() {} + + /** The provider name plus the serialized ScanConfig the listing builder decodes. */ + record Source(String provider, byte[] config) {} + + static Source fromOptions(CaseInsensitiveStringMap options) { + String path = options.get("path"); + if (path == null || path.isEmpty()) { + throw new IllegalArgumentException("the 'datafusion' source requires a 'path' option"); + } + String format = options.containsKey("format") ? options.get("format") : inferFormat(path); + + ListingSource.Builder listing = ListingSource.newBuilder().addPaths(path); + switch (format.toLowerCase(Locale.ROOT)) { + case "csv" -> + listing.setCsv( + CsvReadOptionsProto.newBuilder() + .setHasHeader(options.getBoolean("header", true)) + .setDelimiter(delimiter(options)) + .setQuote('"') + .setFileExtension(".csv") + .build()); + case "parquet" -> + listing.setParquet( + ParquetReadOptionsProto.newBuilder().setFileExtension(".parquet").build()); + case "json" -> + listing.setJson(NdJsonReadOptionsProto.newBuilder().setFileExtension(".json").build()); + default -> throw new IllegalArgumentException("unsupported format: " + format); + } + + byte[] config = + ScanConfig.newBuilder() + .setProvider(PROVIDER) + .setListing(listing.build()) + .build() + .toByteArray(); + return new Source(PROVIDER, config); + } + + private static int delimiter(CaseInsensitiveStringMap options) { + String d = options.containsKey("delimiter") ? options.get("delimiter") : ","; + if (d.length() != 1) { + throw new IllegalArgumentException("delimiter must be a single character, got: " + d); + } + return d.charAt(0); + } + + private static String inferFormat(String path) { + String lower = path.toLowerCase(Locale.ROOT); + if (lower.endsWith(".parquet")) { + return "parquet"; + } + if (lower.endsWith(".json")) { + return "json"; + } + return "csv"; + } +} diff --git a/spark/src/main/java/org/apache/datafusion/spark/SchemaConverter.java b/spark/src/main/java/org/apache/datafusion/spark/SchemaConverter.java new file mode 100644 index 0000000..d61d9c4 --- /dev/null +++ b/spark/src/main/java/org/apache/datafusion/spark/SchemaConverter.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.spark; + +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.StructType; + +/** + * Converts an Arrow schema (produced by the scan ABI) into a Spark {@link StructType}. + * + *

Done directly rather than through Spark's {@code ArrowUtils} so the connector depends only on + * our Arrow version, never Spark's bundled one. Covers the primitive types the row reader produces; + * unsupported types fail fast. + */ +final class SchemaConverter { + + private SchemaConverter() {} + + static StructType toSparkSchema(Schema arrowSchema) { + StructType struct = new StructType(); + for (Field field : arrowSchema.getFields()) { + struct = struct.add(field.getName(), toSparkType(field), field.isNullable()); + } + return struct; + } + + static DataType toSparkType(Field field) { + ArrowType type = field.getType(); + if (type instanceof ArrowType.Int i) { + if (!i.getIsSigned()) { + throw unsupported(field); + } + return switch (i.getBitWidth()) { + case 8 -> DataTypes.ByteType; + case 16 -> DataTypes.ShortType; + case 32 -> DataTypes.IntegerType; + case 64 -> DataTypes.LongType; + default -> throw unsupported(field); + }; + } + if (type instanceof ArrowType.FloatingPoint fp) { + return fp.getPrecision() == FloatingPointPrecision.DOUBLE + ? DataTypes.DoubleType + : DataTypes.FloatType; + } + if (type instanceof ArrowType.Utf8 || type instanceof ArrowType.LargeUtf8) { + return DataTypes.StringType; + } + if (type instanceof ArrowType.Bool) { + return DataTypes.BooleanType; + } + throw unsupported(field); + } + + private static IllegalArgumentException unsupported(Field field) { + return new IllegalArgumentException( + "unsupported Arrow type for column '" + field.getName() + "': " + field.getType()); + } +} diff --git a/spark/src/main/java/org/apache/datafusion/spark/SparkFilters.java b/spark/src/main/java/org/apache/datafusion/spark/SparkFilters.java new file mode 100644 index 0000000..39be5c6 --- /dev/null +++ b/spark/src/main/java/org/apache/datafusion/spark/SparkFilters.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.spark; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.datafusion.protobuf.BinaryExprNode; +import org.apache.datafusion.protobuf.IsNotNull; +import org.apache.datafusion.protobuf.IsNull; +import org.apache.datafusion.protobuf.LogicalExprNode; +import org.apache.datafusion.protobuf.Not; +import org.apache.spark.sql.sources.And; +import org.apache.spark.sql.sources.EqualTo; +import org.apache.spark.sql.sources.Filter; +import org.apache.spark.sql.sources.GreaterThan; +import org.apache.spark.sql.sources.GreaterThanOrEqual; +import org.apache.spark.sql.sources.LessThan; +import org.apache.spark.sql.sources.LessThanOrEqual; +import org.apache.spark.sql.sources.Or; + +import datafusion_common.DatafusionCommon.Column; +import datafusion_common.DatafusionCommon.ScalarValue; + +/** + * Translates Spark {@link Filter}s into serialized {@code datafusion.LogicalExprNode} bytes for + * filter pushdown. + * + *

Translates the comparison, boolean, and null predicates over primitive literals that map + * cleanly; anything else is reported as not pushed so Spark applies it itself. A translated filter + * is applied exactly by DataFusion (the scan core calls {@code DataFrame::filter}), so it is safe + * to treat it as fully handled. + */ +final class SparkFilters { + + private SparkFilters() {} + + /** Pushed filter bytes, and the filters Spark must still apply itself. */ + record Result(List pushed, Filter[] pushedFilters, Filter[] postScan) {} + + static Result split(Filter[] filters) { + List pushed = new ArrayList<>(); + List pushedFilters = new ArrayList<>(); + List postScan = new ArrayList<>(); + for (Filter filter : filters) { + LogicalExprNode expr = translate(filter); + if (expr != null) { + pushed.add(expr.toByteArray()); + pushedFilters.add(filter); + } else { + postScan.add(filter); + } + } + return new Result( + pushed, pushedFilters.toArray(new Filter[0]), postScan.toArray(new Filter[0])); + } + + /** Translate a single filter, or return null if it cannot be expressed. */ + private static LogicalExprNode translate(Filter filter) { + if (filter instanceof EqualTo f) { + return binary("Eq", f.attribute(), f.value()); + } + if (filter instanceof GreaterThan f) { + return binary("Gt", f.attribute(), f.value()); + } + if (filter instanceof GreaterThanOrEqual f) { + return binary("GtEq", f.attribute(), f.value()); + } + if (filter instanceof LessThan f) { + return binary("Lt", f.attribute(), f.value()); + } + if (filter instanceof LessThanOrEqual f) { + return binary("LtEq", f.attribute(), f.value()); + } + if (filter instanceof org.apache.spark.sql.sources.IsNull f) { + return wrap(b -> b.setIsNullExpr(IsNull.newBuilder().setExpr(column(f.attribute())))); + } + if (filter instanceof org.apache.spark.sql.sources.IsNotNull f) { + return wrap(b -> b.setIsNotNullExpr(IsNotNull.newBuilder().setExpr(column(f.attribute())))); + } + if (filter instanceof And f) { + LogicalExprNode l = translate(f.left()); + LogicalExprNode r = translate(f.right()); + return (l == null || r == null) ? null : binaryNodes("And", l, r); + } + if (filter instanceof Or f) { + LogicalExprNode l = translate(f.left()); + LogicalExprNode r = translate(f.right()); + return (l == null || r == null) ? null : binaryNodes("Or", l, r); + } + if (filter instanceof org.apache.spark.sql.sources.Not f) { + LogicalExprNode child = translate(f.child()); + return child == null ? null : wrap(b -> b.setNotExpr(Not.newBuilder().setExpr(child))); + } + return null; + } + + private static LogicalExprNode binary(String op, String attribute, Object value) { + ScalarValue literal = scalar(value); + if (literal == null) { + return null; + } + return binaryNodes( + op, column(attribute), LogicalExprNode.newBuilder().setLiteral(literal).build()); + } + + private static LogicalExprNode binaryNodes( + String op, LogicalExprNode left, LogicalExprNode right) { + return LogicalExprNode.newBuilder() + .setBinaryExpr(BinaryExprNode.newBuilder().addOperands(left).addOperands(right).setOp(op)) + .build(); + } + + private static LogicalExprNode column(String attribute) { + return LogicalExprNode.newBuilder().setColumn(Column.newBuilder().setName(attribute)).build(); + } + + private interface ExprFiller { + LogicalExprNode.Builder apply(LogicalExprNode.Builder builder); + } + + private static LogicalExprNode wrap(ExprFiller filler) { + return filler.apply(LogicalExprNode.newBuilder()).build(); + } + + /** Map a Spark literal to a DataFusion ScalarValue, or null if unsupported. */ + private static ScalarValue scalar(Object value) { + if (value instanceof Long v) { + return ScalarValue.newBuilder().setInt64Value(v).build(); + } + if (value instanceof Integer v) { + return ScalarValue.newBuilder().setInt32Value(v).build(); + } + if (value instanceof Double v) { + return ScalarValue.newBuilder().setFloat64Value(v).build(); + } + if (value instanceof Float v) { + return ScalarValue.newBuilder().setFloat32Value(v).build(); + } + if (value instanceof Boolean v) { + return ScalarValue.newBuilder().setBoolValue(v).build(); + } + if (value instanceof String v) { + return ScalarValue.newBuilder().setUtf8Value(v).build(); + } + return null; + } +} diff --git a/spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister new file mode 100644 index 0000000..fd603b1 --- /dev/null +++ b/spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -0,0 +1 @@ +org.apache.datafusion.spark.DatafusionTableProvider diff --git a/spark/src/test/java/org/apache/datafusion/spark/DatafusionScanBuilderTest.java b/spark/src/test/java/org/apache/datafusion/spark/DatafusionScanBuilderTest.java new file mode 100644 index 0000000..b1695ba --- /dev/null +++ b/spark/src/test/java/org/apache/datafusion/spark/DatafusionScanBuilderTest.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.spark; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.List; + +import org.apache.datafusion.protobuf.ScanRequest; +import org.apache.spark.sql.connector.read.SupportsPushDownFilters; +import org.apache.spark.sql.sources.Filter; +import org.apache.spark.sql.sources.GreaterThanOrEqual; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.StructType; +import org.junit.jupiter.api.Test; + +/** + * Unit-level proof that the scan builder encodes pushdown into the ScanRequest, isolated from + * Spark's own limit/filter handling (which would mask whether we pushed anything). + */ +class DatafusionScanBuilderTest { + + private static final StructType SCHEMA = + new StructType().add("id", DataTypes.LongType).add("name", DataTypes.StringType); + + private DatafusionScanBuilder builder() { + return new DatafusionScanBuilder(SCHEMA, "datafusion.listing", new byte[0]); + } + + private static ScanRequest decode(org.apache.spark.sql.connector.read.Scan scan) + throws Exception { + return ScanRequest.parseFrom(((DatafusionScanImpl) scan).scanRequestBytes()); + } + + @Test + void pushesLimit() throws Exception { + DatafusionScanBuilder b = builder(); + assertTrue(b.pushLimit(7), "limit should be reported as fully pushed"); + ScanRequest request = decode(b.build()); + assertTrue(request.hasLimit()); + assertEquals(7L, request.getLimit()); + } + + @Test + void noLimitWhenNotPushed() throws Exception { + ScanRequest request = decode(builder().build()); + assertFalse(request.hasLimit(), "limit must be unset when Spark pushes none"); + } + + @Test + void pushesProjection() throws Exception { + DatafusionScanBuilder b = builder(); + b.pruneColumns(new StructType().add("name", DataTypes.StringType)); + ScanRequest request = decode(b.build()); + assertEquals(List.of("name"), request.getProjectionList()); + } + + @Test + void pushesComparisonFilter() throws Exception { + DatafusionScanBuilder b = builder(); + Filter[] residual = + ((SupportsPushDownFilters) b).pushFilters(new Filter[] {new GreaterThanOrEqual("id", 2L)}); + assertEquals(0, residual.length, "a translatable filter should be fully pushed"); + ScanRequest request = decode(b.build()); + assertEquals(1, request.getFiltersCount()); + } +} diff --git a/spark/src/test/java/org/apache/datafusion/spark/DatafusionSourceTest.java b/spark/src/test/java/org/apache/datafusion/spark/DatafusionSourceTest.java new file mode 100644 index 0000000..4165921 --- /dev/null +++ b/spark/src/test/java/org/apache/datafusion/spark/DatafusionSourceTest.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datafusion.spark; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; + +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.functions; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * End-to-end test of the {@code datafusion} Spark data source against a local SparkSession: the + * connector reads a CSV through the DataFusion listing provider and the plain-C scan ABI, all the + * way back to Spark rows. Covers schema inference, full scan, projection, and filter pushdown. + */ +class DatafusionSourceTest { + + private static SparkSession spark; + + @TempDir static Path tmp; + + @BeforeAll + static void startSpark() { + spark = + SparkSession.builder() + .master("local[2]") + .appName("datafusion-source-test") + .config("spark.ui.enabled", "false") + .config("spark.sql.shuffle.partitions", "2") + .getOrCreate(); + } + + @AfterAll + static void stopSpark() { + if (spark != null) { + spark.stop(); + } + } + + private Dataset read() throws Exception { + Path csv = tmp.resolve("data.csv"); + Files.writeString(csv, "id,name\n1,a\n2,b\n3,c\n"); + return spark + .read() + .format("datafusion") + .option("path", csv.toString()) + .option("format", "csv") + .load(); + } + + @Test + void inferredSchema() throws Exception { + List columns = Arrays.asList(read().schema().fieldNames()); + assertEquals(List.of("id", "name"), columns); + } + + @Test + void fullScanReturnsAllRows() throws Exception { + assertEquals(3, read().count()); + } + + @Test + void projectionSelectsColumns() throws Exception { + Dataset names = read().select("name"); + assertEquals(List.of("name"), Arrays.asList(names.schema().fieldNames())); + assertEquals(3, names.count()); + } + + @Test + void limitPushdownCapsRows() throws Exception { + assertEquals(2, read().limit(2).count()); + } + + @Test + void filterPushdownReducesRows() throws Exception { + Dataset filtered = read().filter(functions.col("id").geq(2)); + assertEquals(2, filtered.count()); + + List ids = filtered.select("id").as(org.apache.spark.sql.Encoders.LONG()).collectAsList(); + assertTrue(ids.stream().allMatch(id -> id >= 2), "all surviving ids should be >= 2"); + assertEquals(2L + 3L, ids.stream().mapToLong(Long::longValue).sum()); + } +}