diff --git a/.cargo/config.toml b/.cargo/config.toml
new file mode 100644
index 0000000..d7e0ee2
--- /dev/null
+++ b/.cargo/config.toml
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Keep Cargo's workspace output out of `target/` so `mvn clean` (which deletes
+# the root `target/`) does not nuke the Rust build cache.
+[build]
+target-dir = "rust-target"
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index c5db936..da8e65a 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -83,8 +83,8 @@ jobs:
           path: |
             ~/.cargo/registry
             ~/.cargo/git
-            native/target
-          key: ${{ runner.os }}-cargo-${{ hashFiles('native/Cargo.lock') }}
+            rust-target
+          key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }}
           restore-keys: ${{ runner.os }}-cargo-
 
       - name: Build native and run tests
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 4cf628f..952bf34 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -54,7 +54,7 @@ jobs:
         run: ./mvnw -q spotless:check
 
       - name: Check Rust formatting
-        run: cd native && cargo fmt --all -- --check
+        run: cargo fmt --all -- --check
 
   clippy:
     name: Clippy
@@ -81,9 +81,9 @@ jobs:
           path: |
             ~/.cargo/registry
             ~/.cargo/git
-            native/target
-          key: ${{ runner.os }}-clippy-${{ hashFiles('native/Cargo.lock') }}
+            rust-target
+          key: ${{ runner.os }}-clippy-${{ hashFiles('Cargo.lock') }}
           restore-keys: ${{ runner.os }}-clippy-
 
       - name: Run clippy
-        run: cd native && cargo clippy --all-targets -- -D warnings
+        run: cargo clippy --workspace --all-targets -- -D warnings
diff --git a/.gitignore b/.gitignore
index 719a2a4..25c9216 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 target/
+rust-target/
 *.class
 .idea/
 .vscode/
diff --git a/native/Cargo.lock b/Cargo.lock
similarity index 94%
rename from native/Cargo.lock
rename to Cargo.lock
index 96d2f9d..41d022d 100644
--- a/native/Cargo.lock
+++ b/Cargo.lock
@@ -98,9 +98,9 @@ dependencies = [
 
 [[package]]
 name = "ar_archive_writer"
-version = "0.5.1"
+version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b"
+checksum = "4087686b4b0a3427190bae57a1d9a478dbb2d40c5dc1bd6e2b6d797913bdd348"
 dependencies = [
  "object",
 ]
@@ -119,9 +119,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "607e64bb911ee4f90483e044fe78f175989148c2892e659a2cd25429e782ec54"
+checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -140,9 +140,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e754319ed8a85d817fe7adf183227e0b5308b82790a737b426c1124626b48118"
+checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -154,9 +154,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "841321891f247aa86c6112c80d83d89cb36e0addd020fa2425085b8eb6c3f579"
+checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e"
 dependencies = [
  "ahash",
  "arrow-buffer",
@@ -173,9 +173,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f955dfb73fae000425f49c8226d2044dab60fb7ad4af1e24f961756354d996c9"
+checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0"
 dependencies = [
  "bytes",
  "half",
@@ -185,9 +185,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca5e686972523798f76bef355145bc1ae25a84c731e650268d31ab763c701663"
+checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -207,9 +207,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86c276756867fc8186ec380c72c290e6e3b23a1d4fb05df6b1d62d2e62666d48"
+checksum = "e94e8cf7e517657a52b91ea1263acf38c4ca62a84655d72458a3359b12ab97de"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -222,9 +222,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db3b5846209775b6dc8056d77ff9a032b27043383dd5488abd0b663e265b9373"
+checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -235,9 +235,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd8907ddd8f9fbabf91ec2c85c1d81fe2874e336d2443eb36373595e28b98dd5"
+checksum = "238438f0834483703d88896db6fe5a7138b2230debc31b34c0336c2996e3c64f"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -251,9 +251,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4518c59acc501f10d7dcae397fe12b8db3d81bc7de94456f8a58f9165d6f502"
+checksum = "205ca2119e6d679d5c133c6f30e68f027738d95ed948cf77677ea69c7800036b"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -276,9 +276,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efa70d9d6b1356f1fb9f1f651b84a725b7e0abb93f188cf7d31f14abfa2f2e6f"
+checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -289,9 +289,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "faec88a945338192beffbbd4be0def70135422930caa244ac3cec0cd213b26b4"
+checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -302,9 +302,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18aa020f6bc8e5201dcd2d4b7f98c68f8a410ef37128263243e6ff2a47a67d4f"
+checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed"
 dependencies = [
  "bitflags",
  "serde_core",
@@ -313,9 +313,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-select"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a657ab5132e9c8ca3b24eb15a823d0ced38017fe3930ff50167466b02e2d592c"
+checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -327,9 +327,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6de2efbbd1a9f9780ceb8d1ff5d20421b35863b361e3386b4f571f1fc69fcb8"
+checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -393,9 +393,9 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
 
 [[package]]
 name = "autocfg"
-version = "1.5.0"
+version = "1.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
 
 [[package]]
 name = "base64"
@@ -419,9 +419,9 @@ dependencies = [
 
 [[package]]
 name = "bitflags"
-version = "2.11.1"
+version = "2.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
+checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8"
 
 [[package]]
 name = "blake2"
@@ -457,9 +457,9 @@ dependencies = [
 
 [[package]]
 name = "bon"
-version = "3.9.1"
+version = "3.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe"
+checksum = "b2f04f6fef12d70d42a77b1433c9e0f065238479a6cefc4f5bab105e9873a3c3"
 dependencies = [
  "bon-macros",
  "rustversion",
@@ -467,9 +467,9 @@ dependencies = [
 
 [[package]]
 name = "bon-macros"
-version = "3.9.1"
+version = "3.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c"
+checksum = "7d0bd4c2f75335ad98052a37efb54f428b492f64340257143b3429c8a508fa7b"
 dependencies = [
  "darling",
  "ident_case",
@@ -482,9 +482,9 @@ dependencies = [
 
 [[package]]
 name = "brotli"
-version = "8.0.2"
+version = "8.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560"
+checksum = "8119e4516436f5708bbc474a9d395bf12f1b5395e93a92a56e647ac3388c8610"
 dependencies = [
  "alloc-no-stdlib",
  "alloc-stdlib",
@@ -493,9 +493,9 @@ dependencies = [
 
 [[package]]
 name = "brotli-decompressor"
-version = "5.0.0"
+version = "5.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03"
+checksum = "5962523e1b92ce1b5e793d9169b9943eece10d39f62550bc04bb605d75b94924"
 dependencies = [
  "alloc-no-stdlib",
  "alloc-stdlib",
@@ -503,9 +503,9 @@ dependencies = [
 
 [[package]]
 name = "bumpalo"
-version = "3.20.2"
+version = "3.20.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
+checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
 
 [[package]]
 name = "byteorder"
@@ -530,9 +530,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.2.62"
+version = "1.2.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98"
+checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f"
 dependencies = [
  "find-msvc-tools",
  "jobserver",
@@ -571,9 +571,9 @@ dependencies = [
 
 [[package]]
 name = "chrono"
-version = "0.4.44"
+version = "0.4.45"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
+checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327"
 dependencies = [
  "iana-time-zone",
  "num-traits",
@@ -789,9 +789,9 @@ dependencies = [
 
 [[package]]
 name = "dashmap"
-version = "6.1.0"
+version = "6.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
+checksum = "e6361d5c062261c78a176addb82d4c821ae42bed6089de0e12603cd25de2059c"
 dependencies = [
  "cfg-if",
  "crossbeam-utils",
@@ -1306,6 +1306,7 @@ dependencies = [
  "arrow",
  "async-trait",
  "datafusion",
+ "datafusion-jni-common",
  "datafusion-proto",
  "datafusion-spark",
  "datafusion-substrait",
@@ -1320,6 +1321,16 @@ dependencies = [
  "url",
 ]
 
+[[package]]
+name = "datafusion-jni-common"
+version = "0.1.0"
+dependencies = [
+ "datafusion",
+ "futures",
+ "jni",
+ "tokio",
+]
+
 [[package]]
 name = "datafusion-macros"
 version = "53.1.0"
@@ -1514,6 +1525,31 @@ dependencies = [
  "log",
 ]
 
+[[package]]
+name = "datafusion-scan-ffi"
+version = "0.1.0"
+dependencies = [
+ "arrow",
+ "datafusion",
+ "datafusion-proto",
+ "datafusion-scan-ffi",
+ "futures",
+ "prost",
+ "prost-build",
+ "protoc-bin-vendored",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-scan-jni"
+version = "0.1.0"
+dependencies = [
+ "arrow",
+ "datafusion-scan-ffi",
+ "jni",
+ "prost",
+]
+
 [[package]]
 name = "datafusion-session"
 version = "53.1.0"
@@ -1607,9 +1643,9 @@ dependencies = [
 
 [[package]]
 name = "displaydoc"
-version = "0.2.5"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1624,9 +1660,9 @@ checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
 
 [[package]]
 name = "either"
-version = "1.15.0"
+version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
 
 [[package]]
 name = "equivalent"
@@ -1932,9 +1968,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
 
 [[package]]
 name = "http"
-version = "1.4.0"
+version = "1.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
+checksum = "6970f50e31d6fc17d3fa27329444bfa74e196cf62e95052a3f6fee181dba6425"
 dependencies = [
  "bytes",
  "itoa",
@@ -1977,9 +2013,9 @@ checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
 
 [[package]]
 name = "hyper"
-version = "1.9.0"
+version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca"
+checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498"
 dependencies = [
  "atomic-waker",
  "bytes",
@@ -2269,13 +2305,12 @@ dependencies = [
 
 [[package]]
 name = "js-sys"
-version = "0.3.98"
+version = "0.3.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08"
+checksum = "f2025f20d7a4fa7785846e7b63d10a76d3f1cee98ee5cb79ea59703f95e42162"
 dependencies = [
  "cfg-if",
  "futures-util",
- "once_cell",
  "wasm-bindgen",
 ]
 
@@ -2344,9 +2379,9 @@ dependencies = [
 
 [[package]]
 name = "libbz2-rs-sys"
-version = "0.2.3"
+version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3a6a8c165077efc8f3a971534c50ea6a1a18b329ef4a66e897a7e3a1494565f"
+checksum = "34b357333733e8260735ba5894eb928c02ecc69c78715f01a8019e7fa7f2db4c"
 
 [[package]]
 name = "libc"
@@ -2403,9 +2438,9 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.29"
+version = "0.4.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a"
 
 [[package]]
 name = "lru-slab"
@@ -2434,9 +2469,9 @@ dependencies = [
 
 [[package]]
 name = "memchr"
-version = "2.8.0"
+version = "2.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8"
 
 [[package]]
 name = "miniz_oxide"
@@ -2450,9 +2485,9 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "1.2.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
+checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda"
 dependencies = [
  "libc",
  "wasi",
@@ -2598,9 +2633,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "58.2.0"
+version = "58.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43d7efd3052f7d6ef601085559a246bc991e9a8cc77e02753737df6322ce35f1"
+checksum = "5dafa7d01085b62a47dd0c1829550a0a36710ea9c4fe358a05a85477cec8a908"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -2762,9 +2797,9 @@ dependencies = [
 
 [[package]]
 name = "prost"
-version = "0.14.3"
+version = "0.14.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568"
+checksum = "528ac67416ff8646872a3c02cad9cc4ee5dc9f9540c9b10771855c95cb2e5ae1"
 dependencies = [
  "bytes",
  "prost-derive",
@@ -2772,9 +2807,9 @@ dependencies = [
 
 [[package]]
 name = "prost-build"
-version = "0.14.3"
+version = "0.14.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7"
+checksum = "03da047801ff44bb6a4d407d4860c05fd70bb81714e6b2f3812603d5b145b042"
 dependencies = [
  "heck",
  "itertools",
@@ -2791,9 +2826,9 @@ dependencies = [
 
 [[package]]
 name = "prost-derive"
-version = "0.14.3"
+version = "0.14.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b"
+checksum = "b570b25f7617e43d59005d0990ccb79e950a423952cea19671b7a876da390adf"
 dependencies = [
  "anyhow",
  "itertools",
@@ -2804,9 +2839,9 @@ dependencies = [
 
 [[package]]
 name = "prost-types"
-version = "0.14.3"
+version = "0.14.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7"
+checksum = "f94967dc7688f3054c7fac87473ffae4cc4c3904800e2d9f5b857246d8963b0a"
 dependencies = [
  "prost",
 ]
@@ -3063,9 +3098,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.12.3"
+version = "1.12.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -3092,9 +3127,9 @@ checksum = "cab834c73d247e67f4fae452806d17d3c7501756d98c8808d7c9c7aa7d18f973"
 
 [[package]]
 name = "regex-syntax"
-version = "0.8.10"
+version = "0.8.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
+checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
 
 [[package]]
 name = "regress"
@@ -3206,9 +3241,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-native-certs"
-version = "0.8.3"
+version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
+checksum = "dab5152771c58876a2146916e53e35057e1a4dfa2b9df0f0305b07f611fdea4d"
 dependencies = [
  "openssl-probe",
  "rustls-pki-types",
@@ -3389,9 +3424,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.149"
+version = "1.0.150"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9"
 dependencies = [
  "itoa",
  "memchr",
@@ -3461,9 +3496,9 @@ dependencies = [
 
 [[package]]
 name = "shlex"
-version = "1.3.0"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba"
 
 [[package]]
 name = "simd-adler32"
@@ -3503,9 +3538,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
 
 [[package]]
 name = "socket2"
-version = "0.6.3"
+version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
+checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51"
 dependencies = [
  "libc",
  "windows-sys 0.61.2",
@@ -3900,9 +3935,9 @@ checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
 
 [[package]]
 name = "typenum"
-version = "1.20.0"
+version = "1.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de"
+checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20"
 
 [[package]]
 name = "typify"
@@ -3959,9 +3994,9 @@ checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
 
 [[package]]
 name = "unicode-segmentation"
-version = "1.13.2"
+version = "1.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c"
+checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8"
 
 [[package]]
 name = "unicode-width"
@@ -4007,9 +4042,9 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
 
 [[package]]
 name = "uuid"
-version = "1.23.1"
+version = "1.23.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76"
+checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7"
 dependencies = [
  "getrandom 0.4.2",
  "js-sys",
@@ -4068,9 +4103,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.121"
+version = "0.2.123"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790"
+checksum = "a254a4b10c19a76f09a27640e7ffbf9bc30bf67e16a3bf28aaefa4920fe81563"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -4081,9 +4116,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.71"
+version = "0.4.73"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8"
+checksum = "54568702fabf5d4849ce2b90fadfa64168a097eaf4b351ce9df8b687a0086aaf"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -4091,9 +4126,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.121"
+version = "0.2.123"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578"
+checksum = "24a40fc75b0ec6f3746ceb10d36f53a93dcd68a93b11b6445983945d79eba0dc"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -4101,9 +4136,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.121"
+version = "0.2.123"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2"
+checksum = "908f34bd9b9ce3d4caf07b72dfab63d61504d156856c6bd3cd87fa350cf3985b"
 dependencies = [
  "bumpalo",
  "proc-macro2",
@@ -4114,9 +4149,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.121"
+version = "0.2.123"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441"
+checksum = "7acbf7616c27b194bbb550bf77ed0c2c3e5b7fd1260a93082b95fb7f47959b92"
 dependencies = [
  "unicode-ident",
 ]
@@ -4170,9 +4205,9 @@ dependencies = [
 
 [[package]]
 name = "web-sys"
-version = "0.3.98"
+version = "0.3.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa"
+checksum = "6e0871acf327f283dc6da28a1696cdc64fb355ba9f935d052021fa77f35cce69"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -4580,9 +4615,9 @@ checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
 
 [[package]]
 name = "yoke"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca"
+checksum = "709fe23a0424b6a435d82152b1bd3fdfb0833487d5fa90d05d42762a9891fef5"
 dependencies = [
  "stable_deref_trait",
  "yoke-derive",
@@ -4603,18 +4638,18 @@ dependencies = [
 
 [[package]]
 name = "zerocopy"
-version = "0.8.48"
+version = "0.8.52"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
+checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.8.48"
+version = "0.8.52"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
+checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4623,9 +4658,9 @@ dependencies = [
 
 [[package]]
 name = "zerofrom"
-version = "0.1.7"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df"
+checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272"
 dependencies = [
  "zerofrom-derive",
 ]
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..d32ba1d
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,59 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[workspace]
+resolver = "2"
+members = [
+    "native",
+    "native-common",
+    "native-ffi",
+    "native-jni",
+]
+
+# Shared package metadata so every crate moves in lock step. Members inherit
+# via `version.workspace = true` / `edition.workspace = true` etc.; a single
+# bump here re-versions the whole workspace.
+[workspace.package]
+version = "0.1.0"
+edition = "2021"
+license = "Apache-2.0"
+repository = "https://github.com/apache/datafusion-java"
+
+# Every dependency used by any workspace member is declared here so version
+# bumps live in one place and the resolver picks a single version of each
+# crate across the workspace. Members reference these via `{ workspace = true }`
+# and add per-crate flags (optional, features, default-features) at the use
+# site.
+[workspace.dependencies]
+arrow = { version = "58", features = ["ffi"] }
+async-trait = "0.1"
+datafusion = { version = "53.1.0" }
+datafusion-proto = "53.1.0"
+datafusion-spark = "53.1.0"
+datafusion-substrait = "53.1.0"
+futures = "0.3"
+jni = "0.21"
+# Pinned to the major DataFusion 53.1 pulls in transitively (0.13.x) so we
+# share the same `dyn ObjectStore` vtable and don't double-link.
+object_store = { version = "0.13", default-features = false }
+prost = "0.14"
+prost-build = "0.14"
+protoc-bin-vendored = "3"
+tokio = { version = "1", features = ["rt-multi-thread"] }
+# Optional, cfg-gated. See `native/Cargo.toml` for the build-flag dance.
+tokio-metrics = "0.5"
+url = "2"
diff --git a/Makefile b/Makefile
index 6d9b0ae..d6bcf2c 100644
--- a/Makefile
+++ b/Makefile
@@ -20,14 +20,14 @@
 all: native jvm
 
 native:
-	cd native && cargo build
+	cargo build --workspace
 
-# Build the native crate with the `runtime-metrics` Cargo feature enabled.
+# Build the JNI crate with the `runtime-metrics` Cargo feature enabled.
 # Requires `--cfg tokio_unstable` because tokio-metrics gates its API there.
 # Default `make native` does not pull this in; callers who need
 # SessionContext.runtimeStats() pick this target explicitly.
 native-runtime-metrics:
-	cd native && RUSTFLAGS="--cfg tokio_unstable" cargo build --features runtime-metrics
+	RUSTFLAGS="--cfg tokio_unstable" cargo build -p datafusion-jni --features runtime-metrics
 
 jvm:
 	./mvnw package -DskipTests
@@ -39,10 +39,10 @@ test: native
 # `:check` form inline in .github/workflows/lint.yml.
 format:
 	./mvnw -q spotless:apply
-	cd native && cargo fmt --all
+	cargo fmt --all
 
 clean:
-	cd native && cargo clean
+	cargo clean
 	./mvnw clean
 
 tpch-data:
diff --git a/core/pom.xml b/core/pom.xml
index 5ddf107..e589b16 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -88,7 +88,9 @@ under the License.
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-surefire-plugin</artifactId>
                 <configuration>
-                    <argLine>--add-opens=java.base/java.nio=ALL-UNNAMED</argLine>
+                    <!-- java.library.path lets System.loadLibrary find the
+                         datafusion_scan_jni shim built under rust-target/. -->
+                    <argLine>--add-opens=java.base/java.nio=ALL-UNNAMED -Djava.library.path=${maven.multiModuleProjectDirectory}/rust-target/${datafusion.native.profile}</argLine>
                 </configuration>
             </plugin>
             <plugin>
@@ -102,8 +104,8 @@ under the License.
                         <configuration>
                             <target>
                                 <property name="datafusion.native.lib.source"
-                                          value="${maven.multiModuleProjectDirectory}/native/target/${datafusion.native.profile}/${datafusion.lib.filename}"/>
-                                <fail message="Native library not found at ${datafusion.native.lib.source}. Run 'cd native &amp;&amp; cargo build' (or 'make') before building the JAR.">
+                                          value="${maven.multiModuleProjectDirectory}/rust-target/${datafusion.native.profile}/${datafusion.lib.filename}"/>
+                                <fail message="Native library not found at ${datafusion.native.lib.source}. Run 'cargo build -p datafusion-jni' (or 'make') before building the JAR.">
                                     <condition><not><available file="${datafusion.native.lib.source}"/></not></condition>
                                 </fail>
                                 <mkdir dir="${project.build.outputDirectory}/org/apache/datafusion/${datafusion.lib.os}/${datafusion.lib.arch}"/>
diff --git a/core/src/main/java/org/apache/datafusion/SessionContext.java b/core/src/main/java/org/apache/datafusion/SessionContext.java
index ec0bd85..b68cda5 100644
--- a/core/src/main/java/org/apache/datafusion/SessionContext.java
+++ b/core/src/main/java/org/apache/datafusion/SessionContext.java
@@ -113,10 +113,11 @@ public DataFrame fromProto(byte[] planBytes) {
    * other Substrait-emitting tool — and hand them to DataFusion without round-tripping through SQL.
    *
    * <p>Substrait support is gated behind the {@code substrait} Cargo feature on the native crate
-   * and is <strong>off by default</strong>. Rebuild the native crate with {@code cargo build
-   * --features substrait} (or {@code cargo build --features substrait,protoc} for hermetic builds
-   * that vendor {@code protoc} via {@code cmake}) to enable it. If invoked against a native binary
-   * built without the feature, this method throws {@link RuntimeException} pointing at the flag.
+   * and is <strong>off by default</strong>. Rebuild the native crate with {@code cargo build -p
+   * datafusion-jni --features substrait} (or {@code ... --features substrait,protoc} for hermetic
+   * builds that vendor {@code protoc} via {@code cmake}) to enable it. If invoked against a native
+   * binary built without the feature, this method throws {@link RuntimeException} pointing at the
+   * flag.
    *
    * @throws IllegalArgumentException if {@code planBytes} is {@code null}.
    * @throws IllegalStateException if this context is closed.
@@ -183,7 +184,7 @@ public MemoryUsage memoryUsage() {
    * Rebuild with:
    *
    * <pre>{@code
-   * RUSTFLAGS="--cfg tokio_unstable" cargo build --features runtime-metrics
+   * RUSTFLAGS="--cfg tokio_unstable" cargo build -p datafusion-jni --features runtime-metrics
    * }</pre>
    *
    * <p>If invoked against a native binary built without the feature, this method throws {@link
diff --git a/core/src/main/java/org/apache/datafusion/scan/DatafusionScan.java b/core/src/main/java/org/apache/datafusion/scan/DatafusionScan.java
new file mode 100644
index 0000000..6a2d43b
--- /dev/null
+++ b/core/src/main/java/org/apache/datafusion/scan/DatafusionScan.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.scan;
+
+import org.apache.arrow.c.ArrowArrayStream;
+import org.apache.arrow.c.ArrowSchema;
+import org.apache.arrow.c.CDataDictionaryProvider;
+import org.apache.arrow.c.Data;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * A planned scan over a DataFusion {@code TableProvider}, driven through the plain-C scan ABI.
+ *
+ * <p>This is the JVM-facing wrapper over {@link NativeScan}. Each scanned partition is returned as
+ * an {@link ArrowReader} imported from a native {@code FFI_ArrowArrayStream} through the Arrow C
+ * Stream interface, so record batches never pass through JNI -- they cross via the Arrow C Data
+ * interface that arrow-java already speaks. This mirrors {@code DataFrame#collect}.
+ *
+ * <p>The provider and its parameters are supplied as a serialized {@code ScanConfig}; pushed-down
+ * projection/filters/tuning as a serialized {@code ScanRequest}. Both are built with the generated
+ * protobuf classes in {@code org.apache.datafusion.protobuf}.
+ *
+ * <p>Not thread-safe with respect to {@link #close()}: callers must not close a scan while a
+ * partition execute is in flight on another thread.
+ */
+public final class DatafusionScan implements AutoCloseable {
+
+  private final long handle;
+  private boolean closed;
+
+  private DatafusionScan(long handle) {
+    this.handle = handle;
+  }
+
+  /**
+   * Probe a provider's output schema without planning a scan.
+   *
+   * @param allocator allocator for the transient C schema struct
+   * @param provider registered builder name (e.g. {@code datafusion.listing})
+   * @param config serialized {@code ScanConfig}
+   */
+  public static Schema schema(BufferAllocator allocator, String provider, byte[] config) {
+    ArrowSchema cSchema = ArrowSchema.allocateNew(allocator);
+    CDataDictionaryProvider dictionaries = new CDataDictionaryProvider();
+    NativeScan.providerSchema(provider, config, cSchema.memoryAddress());
+    // importField takes ownership of the C struct and returns the struct-typed
+    // root; its children are the table's columns.
+    Field root = Data.importField(allocator, cSchema, dictionaries);
+    return new Schema(root.getChildren());
+  }
+
+  /**
+   * Plan a scan over {@code provider}.
+   *
+   * @param provider registered builder name
+   * @param config serialized {@code ScanConfig}
+   * @param scanRequest serialized {@code ScanRequest}, or {@code null}/empty for no pushdown
+   */
+  public static DatafusionScan create(String provider, byte[] config, byte[] scanRequest) {
+    byte[] request = scanRequest == null ? new byte[0] : scanRequest;
+    return new DatafusionScan(NativeScan.createScan(provider, config, request));
+  }
+
+  /** Number of output partitions this scan produces. */
+  public int partitionCount() {
+    return NativeScan.partitionCount(handle);
+  }
+
+  /**
+   * Execute one partition. The returned {@link ArrowReader} owns the underlying stream; close it
+   * when done. Safe to call concurrently for distinct partitions.
+   */
+  public ArrowReader executePartition(BufferAllocator allocator, int partition) {
+    ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator);
+    NativeScan.executeStreamPartition(handle, partition, stream.memoryAddress());
+    return Data.importArrayStream(allocator, stream);
+  }
+
+  /** Execute the whole plan as a single coalesced reader. */
+  public ArrowReader execute(BufferAllocator allocator) {
+    ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator);
+    NativeScan.executeStream(handle, stream.memoryAddress());
+    return Data.importArrayStream(allocator, stream);
+  }
+
+  @Override
+  public synchronized void close() {
+    if (closed) {
+      return;
+    }
+    closed = true;
+    NativeScan.closeScan(handle);
+  }
+}
diff --git a/core/src/main/java/org/apache/datafusion/scan/NativeScan.java b/core/src/main/java/org/apache/datafusion/scan/NativeScan.java
new file mode 100644
index 0000000..31093d4
--- /dev/null
+++ b/core/src/main/java/org/apache/datafusion/scan/NativeScan.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.scan;
+
+/**
+ * Raw native bindings to the {@code datafusion_scan_jni} shim.
+ *
+ * <p>Every method is a thin pass-through to the in-process scan core. Arrow data is never marshaled
+ * across this boundary: the {@code *Addr} arguments are the memory addresses of {@code
+ * org.apache.arrow.c.ArrowSchema} / {@code ArrowArrayStream} structs allocated by arrow-java, which
+ * the native side fills in place. Callers should use {@link DatafusionScan} rather than these
+ * directly.
+ */
+final class NativeScan {
+
+  static {
+    ScanNativeLoader.load();
+  }
+
+  private NativeScan() {}
+
+  /** Probe a provider's output schema into the {@code ArrowSchema} at {@code schemaAddr}. */
+  static native void providerSchema(String provider, byte[] config, long schemaAddr);
+
+  /**
+   * Plan a scan. Returns an opaque handle; release it with {@link #closeScan(long)}.
+   *
+   * @param provider registered builder name (e.g. {@code datafusion.listing})
+   * @param config serialized {@code ScanConfig}
+   * @param scanRequest serialized {@code ScanRequest} (pushdown), or empty for none
+   */
+  static native long createScan(String provider, byte[] config, byte[] scanRequest);
+
+  /** Output partition count of a planned scan. */
+  static native int partitionCount(long handle);
+
+  /** Execute one partition into the {@code ArrowArrayStream} at {@code streamAddr}. */
+  static native void executeStreamPartition(long handle, int partition, long streamAddr);
+
+  /** Execute the whole plan as one coalesced stream into {@code streamAddr}. */
+  static native void executeStream(long handle, long streamAddr);
+
+  /** Drop a planned scan. Null-safe. */
+  static native void closeScan(long handle);
+}
diff --git a/core/src/main/java/org/apache/datafusion/scan/ScanNativeLoader.java b/core/src/main/java/org/apache/datafusion/scan/ScanNativeLoader.java
new file mode 100644
index 0000000..6540ce4
--- /dev/null
+++ b/core/src/main/java/org/apache/datafusion/scan/ScanNativeLoader.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.scan;
+
+/**
+ * Loads the {@code datafusion_scan_jni} shim library.
+ *
+ * <p>This is the JVM adapter over the plain-C scan ABI exported by {@code
+ * datafusion-scan-ffi}. The library is loaded from {@code java.library.path} (set it with {@code
+ * -Djava.library.path=...} or the platform library-path environment variable so it can find the
+ * built {@code libdatafusion_scan_jni}). Classpath bundling, as the core {@code datafusion_jni}
+ * library does, is left to release packaging.
+ */
+final class ScanNativeLoader {
+
+  private static final String LIBRARY_NAME = "datafusion_scan_jni";
+
+  private static volatile boolean loaded;
+
+  private ScanNativeLoader() {}
+
+  static synchronized void load() {
+    if (loaded) {
+      return;
+    }
+    System.loadLibrary(LIBRARY_NAME);
+    loaded = true;
+  }
+}
diff --git a/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java b/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java
index 120d179..d567275 100644
--- a/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java
+++ b/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java
@@ -37,7 +37,7 @@
  * #checkFeatureEnabled}. Run
  *
  * <pre>{@code
- * (cd native && RUSTFLAGS="--cfg tokio_unstable" cargo build --features runtime-metrics)
+ * RUSTFLAGS="--cfg tokio_unstable" cargo build -p datafusion-jni --features runtime-metrics
  * }</pre>
  *
  * before {@code ./mvnw test} to exercise this class.
diff --git a/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java b/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java
index 34db3b5..a2cfb0a 100644
--- a/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java
+++ b/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java
@@ -50,7 +50,7 @@
  *
  * <p>The {@code substrait} Cargo feature is off by default in {@code native/Cargo.toml}; if the
  * native crate was built without it, every test here is skipped (see {@link #checkFeatureEnabled}).
- * Run {@code (cd native && cargo build --features substrait)} before {@code ./mvnw test} to
+ * Run {@code cargo build -p datafusion-jni --features substrait} before {@code ./mvnw test} to
  * exercise this class.
  */
 class SessionContextSubstraitTest {
diff --git a/core/src/test/java/org/apache/datafusion/scan/DatafusionScanTest.java b/core/src/test/java/org/apache/datafusion/scan/DatafusionScanTest.java
new file mode 100644
index 0000000..2cf61f7
--- /dev/null
+++ b/core/src/test/java/org/apache/datafusion/scan/DatafusionScanTest.java
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.scan;
+
+import static java.util.stream.Collectors.toList;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import com.google.protobuf.ByteString;
+import datafusion_common.DatafusionCommon.Column;
+import datafusion_common.DatafusionCommon.ScalarValue;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.datafusion.protobuf.BinaryExprNode;
+import org.apache.datafusion.protobuf.CsvReadOptionsProto;
+import org.apache.datafusion.protobuf.ListingSource;
+import org.apache.datafusion.protobuf.LogicalExprNode;
+import org.apache.datafusion.protobuf.ScanConfig;
+import org.apache.datafusion.protobuf.ScanRequest;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * End-to-end exercise of the JNI shim: drive the {@code datafusion.listing} provider over a CSV
+ * entirely from Java, confirming the Arrow C Stream produced by arrow-rs imports cleanly through
+ * arrow-java's {@code Data.importArrayStream}. This is the proof that the C Stream ABI matches
+ * across the two Arrow implementations through this path.
+ */
+class DatafusionScanTest {
+
+  private static final String PROVIDER = "datafusion.listing";
+
+  @TempDir Path tmp;
+
+  /** Build a ScanConfig for a CSV listing source, using the generated protobuf builders. */
+  private byte[] csvConfig(String path) {
+    return ScanConfig.newBuilder()
+        .setProvider(PROVIDER)
+        .setListing(
+            ListingSource.newBuilder()
+                .addPaths(path)
+                .setCsv(
+                    CsvReadOptionsProto.newBuilder()
+                        .setHasHeader(true)
+                        .setDelimiter(',')
+                        .setQuote('"')
+                        .setFileExtension(".csv")
+                        .build())
+                .build())
+        .build()
+        .toByteArray();
+  }
+
+  @Test
+  void inferredSchemaMatchesCsvHeader() throws Exception {
+    Path csv = tmp.resolve("data.csv");
+    Files.writeString(csv, "id,name\n1,a\n2,b\n3,c\n");
+    byte[] config = csvConfig(csv.toString());
+
+    try (BufferAllocator allocator = new RootAllocator()) {
+      Schema schema = DatafusionScan.schema(allocator, PROVIDER, config);
+      List<String> names = schema.getFields().stream().map(Field::getName).collect(toList());
+      assertEquals(List.of("id", "name"), names);
+    }
+  }
+
+  @Test
+  void scansCsvRowsThroughArrowCStream() throws Exception {
+    Path csv = tmp.resolve("data.csv");
+    Files.writeString(csv, "id,name\n1,a\n2,b\n3,c\n");
+    byte[] config = csvConfig(csv.toString());
+
+    try (BufferAllocator allocator = new RootAllocator();
+        DatafusionScan scan = DatafusionScan.create(PROVIDER, config, null)) {
+      assertTrue(scan.partitionCount() >= 1, "expected at least one partition");
+
+      long total = 0;
+      int rows = 0;
+      try (ArrowReader reader = scan.execute(allocator)) {
+        VectorSchemaRoot root = reader.getVectorSchemaRoot();
+        while (reader.loadNextBatch()) {
+          rows += root.getRowCount();
+          BigIntVector ids = (BigIntVector) root.getVector("id");
+          for (int i = 0; i < root.getRowCount(); i++) {
+            total += ids.get(i);
+          }
+        }
+      }
+      assertEquals(3, rows);
+      assertEquals(1 + 2 + 3, total);
+    }
+  }
+
+  @Test
+  void projectionPrunesColumns() throws Exception {
+    byte[] config = csvConfig(writeCsv());
+    // Pushed projection: keep only "name".
+    byte[] request = ScanRequest.newBuilder().addProjection("name").build().toByteArray();
+
+    try (BufferAllocator allocator = new RootAllocator();
+        DatafusionScan scan = DatafusionScan.create(PROVIDER, config, request)) {
+      int rows = 0;
+      try (ArrowReader reader = scan.execute(allocator)) {
+        VectorSchemaRoot root = reader.getVectorSchemaRoot();
+        List<String> cols =
+            root.getSchema().getFields().stream().map(Field::getName).collect(toList());
+        assertEquals(List.of("name"), cols, "projection should drop the id column");
+        while (reader.loadNextBatch()) {
+          rows += root.getRowCount();
+        }
+      }
+      assertEquals(3, rows);
+    }
+  }
+
+  @Test
+  void filterPushdownSelectsRows() throws Exception {
+    byte[] config = csvConfig(writeCsv());
+    // Pushed filter: id >= 2.
+    byte[] request =
+        ScanRequest.newBuilder().addFilters(ByteString.copyFrom(idAtLeast(2))).build().toByteArray();
+
+    try (BufferAllocator allocator = new RootAllocator();
+        DatafusionScan scan = DatafusionScan.create(PROVIDER, config, request)) {
+      long total = 0;
+      int rows = 0;
+      try (ArrowReader reader = scan.execute(allocator)) {
+        VectorSchemaRoot root = reader.getVectorSchemaRoot();
+        while (reader.loadNextBatch()) {
+          rows += root.getRowCount();
+          BigIntVector ids = (BigIntVector) root.getVector("id");
+          for (int i = 0; i < root.getRowCount(); i++) {
+            total += ids.get(i);
+          }
+        }
+      }
+      assertEquals(2, rows, "only id 2 and 3 pass the filter");
+      assertEquals(2 + 3, total);
+    }
+  }
+
+  @Test
+  void limitCapsRows() throws Exception {
+    byte[] config = csvConfig(writeCsv());
+    // Pushed limit of 2 over the 3-row CSV.
+    byte[] request = ScanRequest.newBuilder().setLimit(2).build().toByteArray();
+
+    try (BufferAllocator allocator = new RootAllocator();
+        DatafusionScan scan = DatafusionScan.create(PROVIDER, config, request)) {
+      int rows = 0;
+      try (ArrowReader reader = scan.execute(allocator)) {
+        VectorSchemaRoot root = reader.getVectorSchemaRoot();
+        while (reader.loadNextBatch()) {
+          rows += root.getRowCount();
+        }
+      }
+      assertEquals(2, rows, "limit should cap the scan at 2 rows");
+    }
+  }
+
+  /** Serialize the LogicalExprNode for {@code id >= value}, as the engine's filter pushdown would. */
+  private static byte[] idAtLeast(long value) {
+    LogicalExprNode column =
+        LogicalExprNode.newBuilder().setColumn(Column.newBuilder().setName("id")).build();
+    LogicalExprNode literal =
+        LogicalExprNode.newBuilder()
+            .setLiteral(ScalarValue.newBuilder().setInt64Value(value))
+            .build();
+    return LogicalExprNode.newBuilder()
+        .setBinaryExpr(
+            BinaryExprNode.newBuilder().addOperands(column).addOperands(literal).setOp("GtEq"))
+        .build()
+        .toByteArray();
+  }
+
+  private String writeCsv() throws Exception {
+    Path csv = tmp.resolve("data.csv");
+    Files.writeString(csv, "id,name\n1,a\n2,b\n3,c\n");
+    return csv.toString();
+  }
+}
diff --git a/dev/release/build-release.sh b/dev/release/build-release.sh
index 2b033bb..4d4ab13 100755
--- a/dev/release/build-release.sh
+++ b/dev/release/build-release.sh
@@ -135,26 +135,28 @@ JVM_TARGET_DIR="$PROJECT_HOME/core/target/classes/org/apache/datafusion"
 
 mkdir -p "$JVM_TARGET_DIR/linux/amd64"
 docker cp \
-    "$CONTAINER_AMD64:/opt/datafusion-java-rm/datafusion-java/native/target/release/libdatafusion_jni.so" \
+    "$CONTAINER_AMD64:/opt/datafusion-java-rm/datafusion-java/rust-target/release/libdatafusion_jni.so" \
     "$JVM_TARGET_DIR/linux/amd64/"
 
 mkdir -p "$JVM_TARGET_DIR/linux/aarch64"
 docker cp \
-    "$CONTAINER_ARM64:/opt/datafusion-java-rm/datafusion-java/native/target/release/libdatafusion_jni.so" \
+    "$CONTAINER_ARM64:/opt/datafusion-java-rm/datafusion-java/rust-target/release/libdatafusion_jni.so" \
     "$JVM_TARGET_DIR/linux/aarch64/"
 
 echo "Building macOS native libs on the host (host=$HOST_ARCH)"
 rustup target add "$OTHER_DARWIN_TARGET"
 
-(cd "$PROJECT_HOME/native" && cargo build --release)
-(cd "$PROJECT_HOME/native" && cargo build --release --target "$OTHER_DARWIN_TARGET")
+# Cargo writes to the workspace `rust-target/` dir (set in .cargo/config.toml),
+# not the per-crate `native/target/`, so build from the repo root.
+(cd "$PROJECT_HOME" && cargo build --release -p datafusion-jni)
+(cd "$PROJECT_HOME" && cargo build --release -p datafusion-jni --target "$OTHER_DARWIN_TARGET")
 
 mkdir -p "$JVM_TARGET_DIR/darwin/$HOST_DARWIN_DIR"
-cp "$PROJECT_HOME/native/target/release/libdatafusion_jni.dylib" \
+cp "$PROJECT_HOME/rust-target/release/libdatafusion_jni.dylib" \
    "$JVM_TARGET_DIR/darwin/$HOST_DARWIN_DIR/"
 
 mkdir -p "$JVM_TARGET_DIR/darwin/$OTHER_DARWIN_DIR"
-cp "$PROJECT_HOME/native/target/$OTHER_DARWIN_TARGET/release/libdatafusion_jni.dylib" \
+cp "$PROJECT_HOME/rust-target/$OTHER_DARWIN_TARGET/release/libdatafusion_jni.dylib" \
    "$JVM_TARGET_DIR/darwin/$OTHER_DARWIN_DIR/"
 
 echo "Installing JAR into local Maven repo"
diff --git a/dev/release/datafusion-java-rm/build-native-libs.sh b/dev/release/datafusion-java-rm/build-native-libs.sh
index 5f273cc..79f8ae0 100755
--- a/dev/release/datafusion-java-rm/build-native-libs.sh
+++ b/dev/release/datafusion-java-rm/build-native-libs.sh
@@ -38,8 +38,9 @@ git clone "$REPO" datafusion-java
 cd datafusion-java
 git checkout "$BRANCH"
 
-cd native
-cargo build --release
+# Cargo writes to the workspace `rust-target/` dir (set in .cargo/config.toml),
+# not the per-crate `native/target/`, so build from the repo root.
+cargo build --release -p datafusion-jni
 
-echo "Built $(pwd)/target/release/libdatafusion_jni.so"
-ls -l target/release/libdatafusion_jni.so
+echo "Built $(pwd)/rust-target/release/libdatafusion_jni.so"
+ls -l rust-target/release/libdatafusion_jni.so
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 81d83e8..3dbd90f 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -7,7 +7,7 @@
 .mvn/wrapper/maven-wrapper.properties
 mvnw
 mvnw.cmd
-native/Cargo.lock
+Cargo.lock
 dev/release/rat_exclude_files.txt
 docs/source/_static/**
 docs/source/conf.py
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index e486adc..c7767bf 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -150,7 +150,8 @@ test_source_distribution() {
 
   # raises on any formatting errors
   rustup component add rustfmt
-  (cd native && cargo fmt --all -- --check)
+  # Workspace-wide: covers native, native-common, and any future members.
+  cargo fmt --all -- --check
 
   # build native + JVM and run the full test suite
   make test
diff --git a/docs/datafusion-spark-design.md b/docs/datafusion-spark-design.md
new file mode 100644
index 0000000..1019f3b
--- /dev/null
+++ b/docs/datafusion-spark-design.md
@@ -0,0 +1,304 @@
+# DataFusion-backed Spark DataSource: design
+
+## Goal
+
+Let Spark read from a DataFusion `TableProvider` as a native `DataSourceV2`,
+with the native boundary placed at the **Arrow C Data / C Stream interface and
+plain C types** — not at handwritten JNI per operation.
+
+## Origin
+
+On [PR #104](https://github.com/apache/datafusion-java/pull/104), Dewey
+Dunnington (@paleolimbot) reviewed an earlier stack (PR #103) whose cdylib
+exported JNI entry points directly, and argued for a cleaner shape:
+
+> build a cdylib that exports entrypoints that just use the Arrow C Data/Stream
+> interface and C types. That also has broader applicability to non-Java (i.e.,
+> can live in datafusion proper and get eyes/reviews from a wider audience).
+
+This design follows that: the reusable artifact is a **plain-C scan ABI** over
+Arrow C types; JNI is a thin, separable adapter; the same ABI is callable from
+Python/Go/Rust/FFM. "Approach A" — the providers we ship are compiled into the
+cdylib and selected by name, rather than imported over `datafusion-ffi`.
+
+## Principle: two planes, both zero-copy
+
+| Plane | Carries | Crosses via |
+| --- | --- | --- |
+| **Data** | Arrow record batches | Arrow C Stream (`FFI_ArrowArrayStream`) → arrow-java import → Spark `ArrowColumnVector` |
+| **Control** | provider name, config, pushdown, partition index | plain-C calls passing `(ptr, len)` and `long` addresses |
+
+No Arrow data is ever marshaled through JNI. Batches flow through the Arrow C
+Data interface, which arrow-java and arrow-rs already speak; the JVM gets real
+Arrow vectors and hands them to Spark with no per-cell copy.
+
+## Architecture
+
+```
+ spark.read.format("datafusion").option("path", ...).load()
+   │
+   ▼  datafusion-spark (Maven module, Java, Spark 4.0)
+   │  TableProvider → Table → ScanBuilder (projection / filter / limit pushdown)
+   │  → Scan/Batch → InputPartition[]  (serializable: config + request bytes + index)
+   │  → PartitionReaderFactory → ColumnarPartitionReader
+   │
+   ▼  core: org.apache.datafusion.scan.DatafusionScan   (JVM scan API)
+   │  + NativeScan (6 JNI methods)  ──loads──►  libdatafusion_scan_jni
+   │
+   ▼  native-jni: datafusion-scan-jni (cdylib)         ← thin JVM adapter
+   │  Java_…_NativeScan_*  → calls the scan core; writes FFI_ArrowArrayStream
+   │                          into the address arrow-java allocated
+   │
+   ▼  native-ffi: datafusion-scan-ffi (cdylib + rlib)  ← the reusable plain-C ABI
+   │  df_scan_* (extern "C")  → scan core → registered provider builder
+   │  data plane: FFI_ArrowArrayStream  (arrow-rs)
+   │
+   ▼  DataFusion: TableProvider (e.g. ListingTable) reads the source
+```
+
+Non-Java consumers (Python/Go/Rust/FFM) bind `df_scan_*` directly and skip the
+JNI and Spark layers entirely.
+
+## Components
+
+| Path | Crate / module | Role |
+| --- | --- | --- |
+| `native-ffi/` | `datafusion-scan-ffi` (cdylib + rlib) | The plain-C scan ABI; scan core; provider registry; demo + `datafusion.listing` providers |
+| `native-jni/` | `datafusion-scan-jni` (cdylib) | Thin JNI shim over the scan core |
+| `core/.../scan/` | part of `datafusion-java` | `NativeScan` (native decls), `ScanNativeLoader`, `DatafusionScan` (JVM API) |
+| `spark/` | `datafusion-spark` (Java) | The Spark `DataSourceV2` connector |
+| `proto/` | shared | `scan_config.proto`, `scan_request.proto` |
+
+## The plain-C ABI (`native-ffi/include/datafusion_scan.h`)
+
+```c
+uint64_t df_scan_abi_version(void);
+void     df_error_free(char* err);
+
+int32_t df_scan_schema(DfStr provider, DfBytes options, DfBytes partition,
+                       struct ArrowSchema* out_schema, char** out_err);
+int32_t df_scan_create(DfStr provider, DfBytes options, DfBytes partition,
+                       int32_t target_partitions, int32_t batch_size, int64_t limit,
+                       const DfKeyValue* config_overrides, size_t config_overrides_len,
+                       const DfStr* projection, size_t projection_len,
+                       const DfBytes* filters, size_t filters_len,
+                       DfScanHandle** out_handle, char** out_err);
+int32_t df_scan_partition_count(const DfScanHandle*, int32_t* out_count, char** out_err);
+int32_t df_scan_execute_partition(const DfScanHandle*, int32_t partition,
+                                  struct ArrowArrayStream* out_stream, char** out_err);
+int32_t df_scan_execute(const DfScanHandle*, struct ArrowArrayStream* out_stream, char** out_err);
+void    df_scan_close(DfScanHandle*);
+```
+
+Conventions: every fallible call returns `0` / nonzero `DfStatus`, writing a
+malloc'd message to `*out_err` (freed by `df_error_free`). The only "rich" types
+crossing are the standard Arrow C structs `ArrowSchema` / `ArrowArrayStream`.
+Each call is wrapped in `catch_unwind` so a Rust panic becomes a status code,
+never an unwind across the C boundary.
+
+Providers are registered by name (`register_provider`) and select via the
+`provider` argument; the `options`/`partition` blobs are opaque to the ABI and
+decoded by the registered builder.
+
+## Wire formats (`proto/`)
+
+- **`ScanConfig`** — the `options` blob: `provider` name + a `source` oneof
+  (`ListingSource` reusing the per-format read-option messages, or a `custom`
+  bytes escape hatch). `ScanPartition` is the per-partition `partition` blob.
+- **`ScanRequest`** — the engine's pushdown: `projection` (column names),
+  `filters` (each a serialized `datafusion.LogicalExprNode`), `limit`,
+  `target_partitions`, `batch_size`, `config_overrides`.
+
+`ScanRequest` is decoded by the JNI shim and exploded into `df_scan_create`'s
+typed C arguments, rather than passed as one blob — keeping the C ABI typed and
+FFM-friendly. Filters reuse DataFusion's own `LogicalExprNode` proto, so the
+Java side generates builders and the Rust side decodes with the stock codec from
+the same `.proto` — and the encoding is shared with any future Comet path.
+
+## JNI shim (`native-jni` + `core/.../scan`)
+
+Six `Java_…NativeScan_*` methods: `providerSchema`, `createScan`,
+`partitionCount`, `executeStreamPartition`, `executeStream`, `closeScan`. Each
+marshals a `String` + `byte[]`s and `long` addresses; the data plane writes an
+`FFI_ArrowArrayStream` into the arrow-java-allocated struct. `DatafusionScan`
+wraps these and returns an `ArrowReader` via `Data.importArrayStream`, mirroring
+`core`'s existing `DataFrame#collect`.
+
+## Arrow version strategy (the key integration decision)
+
+`ArrowColumnVector` is zero-copy only if the vectors we hand it are the **same
+arrow-java classes** Spark loaded — i.e. one Arrow in the executor JVM. So the
+connector treats arrow-java as **`provided`**: the cluster supplies it, our
+stream import and Spark's `ArrowColumnVector` share it, and columnar works with
+whatever Arrow the deployment ships (within an API-compatible window of the
+compile baseline, currently Spark 4.0's Arrow 18.1).
+
+Consequences:
+
+- **`datafusion-java` (core) stays on Arrow 19** for standalone use; only its
+  Arrow transitive is excluded from the Spark module. No main downgrade.
+- **The Rust side is unaffected.** The Arrow C Data interface is a stable spec,
+  independent of Arrow library version: `arrow-rs 58` producing an
+  `FFI_ArrowArrayStream` imports into arrow-java 18 or 19 alike. Verified by the
+  JVM round-trip test.
+
+## Spark DataSourceV2 mapping
+
+| Spark interface | Our class | Behaviour |
+| --- | --- | --- |
+| `TableProvider`, `DataSourceRegister` | `DatafusionTableProvider` | `"datafusion"` short name; `inferSchema` probes via `df_scan_schema` |
+| `Table`, `SupportsRead` | `DatafusionTable` | `BATCH_READ` capability |
+| `ScanBuilder` + `SupportsPushDown{RequiredColumns,Filters,Limit}` | `DatafusionScanBuilder` | encodes projection / filters / limit into `ScanRequest` |
+| `Scan`, `Batch` | `DatafusionScanImpl` | plans once on the driver for partition count |
+| `InputPartition` | `DatafusionInputPartition` | **serializable**: carries config + request bytes + index, never a native handle |
+| `PartitionReaderFactory` | `DatafusionPartitionReaderFactory` | columnar reads |
+| `PartitionReader<ColumnarBatch>` | `DatafusionColumnarPartitionReader` | wraps imported Arrow vectors in `ArrowColumnVector`, zero-copy |
+
+Helpers: `OptionsCodec` (Spark options → `ScanConfig`), `SchemaConverter` (Arrow
+schema → Spark `StructType`, using only our Arrow types), `SparkFilters` (Spark
+`Filter`s → `LogicalExprNode`: comparisons, `And`/`Or`/`Not`, `IsNull`/
+`IsNotNull` over primitive literals; anything else falls back to Spark).
+
+**Partition serialization constraint:** a native handle is meaningless in
+another executor process, so partitions carry only bytes + an index, and each
+executor rebuilds the provider and runs its own partition. A limited plan
+coalesces to one partition, so `pushLimit` can report the bound as fully
+handled.
+
+## Testing
+
+| Level | Where | Proves |
+| --- | --- | --- |
+| Rust ABI round-trip | `native-ffi/tests/roundtrip.rs` | `df_scan_*` + import the stream back via the Arrow C Stream interface; partition count; limit; error/status |
+| Rust proto | `native-ffi/tests/proto.rs` | `ScanConfig`/`ScanRequest` encode/decode incl. embedded read-options |
+| Rust listing | `native-ffi/tests/listing.rs` | real `ListingTable` over a CSV, schema inference, full scan |
+| JVM scan | `core/.../scan/DatafusionScanTest` | end-to-end Java → JNI → Arrow C Stream; schema, scan, projection, filter, limit (closes the arrow-rs 58 ↔ arrow-java 19 ABI question) |
+| Spark unit | `spark/.../DatafusionScanBuilderTest` | decodes the built `ScanRequest` to prove pushdown is actually encoded (isolated from Spark's own handling) |
+| Spark E2E | `spark/.../DatafusionSourceTest` | local `SparkSession` over `format("datafusion")`: schema, full scan, projection, filter, limit on Spark 4.0 columnar |
+
+## Decisions log
+
+- **Approach A over `datafusion-ffi` import.** `datafusion-ffi` already exposes
+  the whole `TableProvider`, but over stabby vtables + an async, poll-based
+  `FFI_RecordBatchStream` — not Java-consumable and not flat C. Compiling
+  providers in and exporting flat C is simpler and is exactly the shape Dewey
+  asked for. The async surface would only be needed to load *third-party*
+  provider cdylibs (a future option B).
+- **Plain C + thin JNI, not JNI-in-the-cdylib.** Keeps the reusable artifact
+  language-neutral and upstreamable; quarantines the JVM into a ~6-method shim.
+- **Row-based → columnar.** Shipped row-based first to decouple from Spark's
+  Arrow, then moved to columnar once the `provided`-Arrow strategy removed the
+  version clash. Columnar is zero-copy; row-based is gone.
+- **Spark 4.0 / Arrow 18.1 baseline, Java.** Java matches the rest of the stack;
+  Spark 4.0's Arrow (18.1) is close to ours and Java-17 native.
+
+## Status and gaps
+
+Built and green end to end: the plain-C ABI, the JNI shim, and a columnar Spark
+4.0 connector with projection / filter / limit pushdown.
+
+Not yet done:
+
+- **Multi-partition coverage.** The executor-rebuild path is wired but exercised
+  only at one partition (single CSV); a directory/Parquet test would cover N>1.
+- **Native library packaging.** The shim loads from `java.library.path`;
+  classpath bundling per OS/arch (as `core` does for `datafusion_jni`) is left
+  to release packaging.
+- **Format breadth.** CSV options are fully mapped; Parquet/Avro/Arrow use
+  defaults.
+- **External provider cdylibs (option B).** Loading third-party providers over
+  `datafusion-ffi`'s `ForeignTableProvider` is not implemented.
+
+## Alternative / companion front-end: ADBC
+
+A reviewer suggested exposing arbitrary DataFusion `TableProvider`s over
+[ADBC](https://arrow.apache.org/adbc/) (Arrow Database Connectivity) instead of —
+or alongside — this scan ABI. The two are not mutually exclusive: they are two
+front-ends over the same core, serving different consumers.
+
+### What this PR's work reuses
+
+The PR already cleaves at the right seam. Three layers, and the valuable two are
+front-end-agnostic:
+
+| Layer | ADBC reuse |
+| --- | --- |
+| Exec core (`scan.rs`, `reader.rs`, `runtime.rs`) — build provider → register on `SessionContext` → plan → `ExecutionPlan` → partition stream → `FFI_ArrowArrayStream` | **Direct reuse.** Already JVM-free and C-free. |
+| Provider registry (`registry.rs`) — register `TableProvider` by name, build on demand | **Direct reuse.** This *is* the "arbitrary providers" mechanism. |
+| `native-common` (errors, tokio handle); panic→status `catch_unwind` pattern | Reuse concept; ADBC has its own error struct. |
+| `df_scan_*` flat C ABI, proto pushdown (`ScanRequest` / `SparkFilters` / `LogicalExprNode`), JNI shim, `core/scan/*`, `spark/*` | **Not reused.** Scan-, JVM-, and Spark-specific. |
+
+`reader.rs`'s `StreamingReader` (DataFusion `SendableRecordBatchStream` →
+`ArrowArrayStream`) is exactly what ADBC's `AdbcStatementExecuteQuery` returns:
+the data plane is identical, and the cross-implementation Arrow C Stream question
+this PR already answered carries over unchanged.
+
+### What ADBC adds, and what it drops
+
+ADBC mandates a fixed, large C surface — `AdbcDatabase` / `AdbcConnection` /
+`AdbcStatement` lifecycle, option getters/setters, metadata calls, an
+`AdbcDriverInit` entry point. You do **not** hand-write that vtable: the official
+`adbc_core` Rust crate supplies `Database` / `Connection` / `Statement` traits
+plus an `export_driver!` macro that generates the C ABI. So the FFI layer becomes
+trait glue, not a second hand-written boundary.
+
+New work:
+
+- `adbc_core` dependency + three trait impls. `Database` holds config + registered
+  providers; `Connection` wraps a `SessionContext`; `Statement` holds SQL + bound
+  params and, on execute, runs `ctx.sql(q)` → physical plan → the existing
+  `StreamingReader`.
+- Catalog metadata methods (`GetObjects` / `GetTableSchema` / `GetTableTypes` /
+  `GetInfo`) → DataFusion `CatalogProvider` / `SchemaProvider` introspection.
+- ADBC error / status mapping in place of `DfStatus`.
+- Optional: parameter binding / prepared statements; `ExecutePartitions` (maps
+  cleanly onto the existing plan-partition logic); ingest/write (likely out of
+  scope).
+- Driver packaging (a manifest so `adbc_driver_manager` can load the library).
+
+Dropped relative to the Spark path: the protobuf pushdown machinery
+(`ScanRequest`, `SparkFilters`, `LogicalExprNode` encoding) is unneeded — ADBC
+clients send SQL and DataFusion's optimizer does pushdown internally — as are the
+JNI shim, `core/scan`, and the Spark module.
+
+### Suggested layout for both
+
+```
+native-common/        errors, tokio runtime           [shared]
+native-exec-core/     provider registry + plan/exec   [shared]  ← lift scan.rs/reader.rs/registry.rs here
+  ├─ native-ffi/      df_scan_* flat C (+ JNI/Spark)   [exists]
+  └─ native-adbc/     adbc_core trait impls            [new]
+```
+
+One refactor on the existing side: lift `scan.rs` / `reader.rs` / `registry.rs`
+out of `native-ffi` into a shared `native-exec-core` crate that both front-ends
+depend on; `native-ffi` keeps only `abi.rs` + proto. Low churn — those modules
+are already free of C/JVM concerns by design.
+
+### Why keep both rather than collapse to one
+
+Different consumers. `df_scan_*` is a bespoke, scan-only ABI with **explicit**
+pushdown: every consumer hand-binds it, but it can carry Spark's pre-resolved
+predicates without a SQL round-trip. ADBC is a SQL-oriented **standard** ABI:
+bigger mandated surface, but the whole client ecosystem (Python
+`adbc_driver_manager`, R, Go, the JDBC↔ADBC bridge) comes for free.
+
+They are not redundant, because Spark's pre-resolved pushdown does not always
+re-serialize to a SQL string:
+
+- **Lossy but rescuable** (within current filter scope): float/double literals
+  (decimal-text render loses exact IEEE bits), `NaN`/`±Inf` (no SQL literal),
+  decimal precision/scale, binary/non-UTF8 literals, null-safe equality
+  (`<=>` → `IS NOT DISTINCT FROM`), identifier quoting/case. ADBC parameter
+  binding (`WHERE col = ?` with a typed bound value) closes most of the literal
+  cases.
+- **Structurally impossible**: pushdown whose value is not known at
+  statement-prepare time — dynamic partition pruning, runtime/bloom filters from
+  joins — cannot be a static SQL string, and binding does not help because the
+  value arrives mid-execution. This PR pushes none of these yet, but it is the
+  reason a typed-`Expr` scan ABI is not merely a convenience over SQL: it is the
+  only path that can carry runtime filters at all.
+
+So the recommendation is a shared `native-exec-core` with two thin front-ends:
+ADBC for SQL clients across the Arrow ecosystem, the flat-C scan ABI for
+embedders (Spark today) that push pre-resolved or runtime predicates.
diff --git a/docs/source/contributor-guide/development.md b/docs/source/contributor-guide/development.md
index 984d77c..61d4fb0 100644
--- a/docs/source/contributor-guide/development.md
+++ b/docs/source/contributor-guide/development.md
@@ -42,7 +42,7 @@ This builds the native Rust crate and runs the JUnit tests. The steps can
 be run individually:
 
 ```sh
-cd native && cargo build
+cargo build --workspace
 ./mvnw test
 ```
 
@@ -74,6 +74,11 @@ disk space.
 
 The repository is a multi-module Maven build:
 
+- `Cargo.toml` — Rust workspace root declaring the crate members
+  (`native`, `native-common`) and `[workspace.dependencies]` that pin
+  shared versions in one place. Cargo writes artifacts to `rust-target/`
+  (overridden in `.cargo/config.toml`) so `mvn clean` at the repo root does
+  not nuke the Rust build cache.
 - `pom.xml` — parent POM declaring the `core` and `examples` modules and
   shared plugin/dependency versions.
 - `core/` — `datafusion-java` library module (Java sources, tests, and
@@ -81,7 +86,10 @@ The repository is a multi-module Maven build:
 - `examples/` — `datafusion-java-examples` module containing runnable
   examples that depend on the library; built alongside the library so they
   cannot fall out of sync with the API.
-- `native/` — Rust crate (JNI + Arrow C Data Interface).
+- `native/` — `datafusion-jni` Rust crate (JNI + Arrow C Data Interface).
+- `native-common/` — `datafusion-jni-common` Rust crate: JNI plumbing
+  shared across native crates (error→exception mapping, the per-cdylib
+  Tokio runtime singleton, the async-stream→`FFI_ArrowArrayStream` bridge).
 - `proto/` — Protobuf definitions shared between Java and Rust.
 - `Makefile` — top-level build orchestration (`make test`, `make format`,
   `make tpch-data`).
diff --git a/docs/source/contributor-guide/updating-datafusion-version.md b/docs/source/contributor-guide/updating-datafusion-version.md
index 56d50dc..6e3b90b 100644
--- a/docs/source/contributor-guide/updating-datafusion-version.md
+++ b/docs/source/contributor-guide/updating-datafusion-version.md
@@ -21,7 +21,9 @@ under the License.
 
 Three things must move together when bumping DataFusion:
 
-1. `native/Cargo.toml` — the `datafusion` crate dependency.
+1. `Cargo.toml` (workspace root) — the `datafusion`, `datafusion-proto`,
+   `datafusion-spark`, and `datafusion-substrait` entries in
+   `[workspace.dependencies]`. Members inherit from there.
 2. `pom.xml` — the `<datafusion.version>` Maven property. **Must equal
    the Cargo version**; a mismatch means JVM-built protobuf plans won't
    deserialize on the native side.
@@ -32,9 +34,9 @@ Three things must move together when bumping DataFusion:
 ## Recipe
 
 ```sh
-# 1. Bump the Cargo dep
-$EDITOR native/Cargo.toml             # set datafusion = "<new>"
-(cd native && cargo update -p datafusion)
+# 1. Bump the workspace dep
+$EDITOR Cargo.toml                    # set datafusion = "<new>" in [workspace.dependencies]
+cargo update -p datafusion
 
 # 2. Bump the Maven property to match
 $EDITOR pom.xml                       # set <datafusion.version>
diff --git a/native-common/Cargo.toml b/native-common/Cargo.toml
new file mode 100644
index 0000000..21a2296
--- /dev/null
+++ b/native-common/Cargo.toml
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-jni-common"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+# Implementation detail of datafusion-java's native crates, not a standalone
+# crates.io library. Matches `publish = false` on the `datafusion-jni` crate.
+publish = false
+readme = "README.md"
+description = "Shared JNI plumbing for DataFusion Java native crates: error-to-exception mapping, the per-cdylib Tokio runtime singleton, and the async-stream-to-FFI_ArrowArrayStream bridge."
+
+[features]
+# `datafusion-jni` builds DataFusion with `avro`, which adds the
+# `DataFusionError::AvroError` variant our classifier maps to IoException.
+# Feature-forwarded so consumers that don't read Avro (the Spark helper)
+# don't pull the apache-avro stack into their cdylib.
+avro = ["datafusion/avro"]
+
+[dependencies]
+datafusion = { workspace = true }
+futures = { workspace = true }
+jni = { workspace = true }
+tokio = { workspace = true }
diff --git a/native-common/README.md b/native-common/README.md
new file mode 100644
index 0000000..aadf877
--- /dev/null
+++ b/native-common/README.md
@@ -0,0 +1,37 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# datafusion-jni-common
+
+Shared JNI plumbing for the [Apache DataFusion Java](https://github.com/apache/datafusion-java)
+native crates. It holds the pieces every DataFusion-backed `cdylib` loaded into a
+JVM needs, factored out so they live in one place.
+
+## Linking model
+
+Each consuming `cdylib` statically links its own copy of this crate, so the
+runtime singleton is per-library, not per-process. Nothing here is exported with
+`#[no_mangle]`, so linking it into several `cdylib`s loaded in one JVM cannot
+collide.
+
+## Status
+
+This crate is an implementation detail of Apache DataFusion Java. Its API may
+change between releases to track the needs of the native crates that depend on
+it.
diff --git a/native/src/errors.rs b/native-common/src/errors.rs
similarity index 95%
rename from native/src/errors.rs
rename to native-common/src/errors.rs
index d926544..f9dbb03 100644
--- a/native/src/errors.rs
+++ b/native-common/src/errors.rs
@@ -96,8 +96,11 @@ fn classify(err: &DataFusionError) -> &'static str {
         }
         DataFusionError::IoError(_)
         | DataFusionError::ObjectStore(_)
-        | DataFusionError::ParquetError(_)
-        | DataFusionError::AvroError(_) => "org/apache/datafusion/IoException",
+        | DataFusionError::ParquetError(_) => "org/apache/datafusion/IoException",
+        // The AvroError variant only exists when DataFusion is built with its
+        // `avro` feature, forwarded by this crate's own `avro` feature.
+        #[cfg(feature = "avro")]
+        DataFusionError::AvroError(_) => "org/apache/datafusion/IoException",
         // ArrowError is a 21-variant grab bag -- only some of those variants
         // are actually IO-shaped. DivideByZero / ArithmeticOverflow / Compute
         // / Cast / InvalidArgument / Memory etc. are execution-time failures
@@ -161,7 +164,10 @@ fn throw(env: &mut JNIEnv, class: &str, message: &str) {
     let _ = env.throw_new(class, message);
 }
 
-fn panic_message(panic: &Box<dyn Any + Send>) -> String {
+/// Best-effort extraction of a panic payload's message. `catch_unwind` hands
+/// back a `Box<dyn Any>`; the payload is a `String` or `&str` for ordinary
+/// `panic!`/`unwrap` sites, anything else is opaque.
+pub fn panic_message(panic: &Box<dyn Any + Send>) -> String {
     if let Some(s) = panic.downcast_ref::<String>() {
         s.clone()
     } else if let Some(s) = panic.downcast_ref::<&str>() {
diff --git a/native-common/src/lib.rs b/native-common/src/lib.rs
new file mode 100644
index 0000000..ba47004
--- /dev/null
+++ b/native-common/src/lib.rs
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! JNI plumbing shared by this workspace's native crates (`datafusion-jni`
+//! and `datafusion-spark-bridge`, and through the latter every bridge
+//! cdylib): the error-to-Java-exception mapping, the per-cdylib Tokio
+//! runtime singleton, and the async-stream-to-`FFI_ArrowArrayStream`
+//! bridge.
+//!
+//! Each cdylib statically links its own copy of this rlib, so [`runtime`] is
+//! a per-cdylib singleton -- exactly the behaviour each crate had when this
+//! code lived inline. Nothing here is exported with `#[no_mangle]`, so
+//! linking this crate into several cdylibs loaded in one JVM cannot collide.
+
+pub mod errors;
+
+use std::panic::{catch_unwind, AssertUnwindSafe};
+use std::sync::OnceLock;
+
+use datafusion::arrow::array::RecordBatch;
+use datafusion::arrow::datatypes::SchemaRef;
+use datafusion::arrow::error::ArrowError;
+use datafusion::arrow::record_batch::RecordBatchReader;
+use datafusion::execution::SendableRecordBatchStream;
+use futures::StreamExt;
+use tokio::runtime::{Handle, Runtime};
+
+static RT: OnceLock<Runtime> = OnceLock::new();
+
+/// The cdylib-wide Tokio runtime.
+pub fn runtime() -> &'static Runtime {
+    runtime_with_init(|_| {})
+}
+
+/// Same singleton as [`runtime`], with a hook that runs exactly once, when
+/// the runtime is created. `datafusion-jni` uses it to install its
+/// runtime-metrics accumulator so the sampling baseline coincides with
+/// runtime start; every later call (either entry point) returns the existing
+/// runtime without invoking the hook.
+pub fn runtime_with_init(init: impl FnOnce(&Handle)) -> &'static Runtime {
+    RT.get_or_init(|| {
+        let rt = Runtime::new().expect("failed to create Tokio runtime");
+        init(rt.handle());
+        rt
+    })
+}
+
+/// Bridges DataFusion's async [`SendableRecordBatchStream`] to the synchronous
+/// [`RecordBatchReader`] interface that `FFI_ArrowArrayStream` (and therefore
+/// the Java `ArrowReader`) consumes. Each call to `next()` drives one
+/// `runtime().block_on(stream.next())`, so memory pressure stays bounded by the
+/// executor pipeline plus a single in-flight batch.
+pub struct StreamingReader {
+    pub schema: SchemaRef,
+    pub stream: SendableRecordBatchStream,
+}
+
+impl Iterator for StreamingReader {
+    type Item = Result<RecordBatch, ArrowError>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // Arrow's C ABI invokes this iterator through FFI_ArrowArrayStream's
+        // vtable, outside the JNI handler's try_unwrap_or_throw guard. A panic
+        // here (buggy UDF, arrow cast that panics, runtime poison) would
+        // unwind across C/FFI -- undefined behaviour. Catch it and surface as
+        // an ArrowError so the Java side sees a normal exception instead.
+        let next = catch_unwind(AssertUnwindSafe(|| runtime().block_on(self.stream.next())));
+        match next {
+            Ok(item) => item.map(|r| r.map_err(|e| ArrowError::ExternalError(Box::new(e)))),
+            Err(panic) => {
+                let msg = errors::panic_message(&panic);
+                Some(Err(ArrowError::ExternalError(
+                    format!("panic in DataFrame stream: {msg}").into(),
+                )))
+            }
+        }
+    }
+}
+
+impl RecordBatchReader for StreamingReader {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
diff --git a/native-ffi/Cargo.toml b/native-ffi/Cargo.toml
new file mode 100644
index 0000000..cd97d2b
--- /dev/null
+++ b/native-ffi/Cargo.toml
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-scan-ffi"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+# Not published yet; this is the in-tree home of the plain-C scan ABI while it
+# stabilizes. The intent is for this surface to eventually live in DataFusion
+# proper (it has no JVM/JNI dependency), so keep it free of anything
+# Java-specific.
+publish = false
+
+[lib]
+# `cdylib`  -> the shippable plain-C shared library (`libdatafusion_scan_ffi`).
+# `rlib`    -> lets a downstream cdylib statically link this crate, register
+#              its own providers, and re-export the `df_scan_*` symbols; also
+#              gives `cargo test` a Rust harness that round-trips the ABI with
+#              no JVM in sight.
+crate-type = ["cdylib", "rlib"]
+
+[features]
+# A built-in in-memory provider builder registered under `datafusion.memory`,
+# used by the round-trip tests and handy as a reference builder. Off by default
+# so a production cdylib only carries the providers it registers itself.
+demo-providers = []
+
+[dependencies]
+# The arrow C Data / C Stream interface types are the entire data plane of this
+# ABI. `ffi` pulls in both `arrow::ffi` (FFI_ArrowSchema/Array) and
+# `arrow::ffi_stream` (FFI_ArrowArrayStream). Same crate+version DataFusion
+# links, so the types unify.
+arrow = { workspace = true }
+# `avro` enables AvroFormat for the listing provider; parquet/csv/json/arrow
+# formats are on by default.
+datafusion = { workspace = true, features = ["avro"] }
+# Pushed filters arrive as serialized `datafusion.LogicalExprNode` protobufs --
+# the same vocabulary `datafusion-ffi` already uses, so the encoder is shared
+# with any future Comet path.
+datafusion-proto = { workspace = true }
+futures = { workspace = true }
+prost = { workspace = true }
+tokio = { workspace = true }
+
+[dev-dependencies]
+# Round-trip tests import the produced FFI_ArrowArrayStream back into Rust via
+# the same C Stream interface a Java/Python/Go consumer would use.
+datafusion-scan-ffi = { path = ".", features = ["demo-providers"] }
+
+[build-dependencies]
+# Compiles scan_config.proto / scan_request.proto (and the per-format read
+# option messages they embed) into Rust so provider builders can decode the
+# `options` blob. Mirrors `native/build.rs`.
+prost-build = { workspace = true }
+protoc-bin-vendored = { workspace = true }
diff --git a/native-ffi/build.rs b/native-ffi/build.rs
new file mode 100644
index 0000000..a1be583
--- /dev/null
+++ b/native-ffi/build.rs
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+fn main() {
+    // scan_config.proto embeds the per-format read-option messages, which in
+    // turn import file_compression_type; every transitively-referenced file
+    // must be compiled so the generated `ScanConfig` has its field types.
+    const PROTOS: &[&str] = &[
+        "../proto/scan_config.proto",
+        "../proto/scan_request.proto",
+        "../proto/file_compression_type.proto",
+        "../proto/csv_read_options.proto",
+        "../proto/json_read_options.proto",
+        "../proto/parquet_read_options.proto",
+        "../proto/avro_read_options.proto",
+        "../proto/arrow_read_options.proto",
+    ];
+    for p in PROTOS {
+        println!("cargo:rerun-if-changed={p}");
+    }
+    // Honor a caller-provided PROTOC (e.g. a system install) and otherwise fall
+    // back to the vendored binary, matching `native/build.rs`.
+    if std::env::var_os("PROTOC").is_none() {
+        let protoc = protoc_bin_vendored::protoc_bin_path().expect("vendored protoc not available");
+        std::env::set_var("PROTOC", protoc);
+    }
+    prost_build::compile_protos(PROTOS, &["../proto"]).expect("failed to compile protos");
+}
diff --git a/native-ffi/include/datafusion_scan.h b/native-ffi/include/datafusion_scan.h
new file mode 100644
index 0000000..afa6a2e
--- /dev/null
+++ b/native-ffi/include/datafusion_scan.h
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Plain-C scan ABI over the Arrow C Data / C Stream interface.
+//
+// The only "rich" types crossing this boundary are the standard Arrow C
+// structs `ArrowSchema` and `ArrowArrayStream` (from Arrow's abi.h), which any
+// Arrow implementation can produce/consume. Everything else is C primitives
+// and borrowed (ptr, len) views. No JVM/JNI types appear here, by design.
+
+#ifndef DATAFUSION_SCAN_H
+#define DATAFUSION_SCAN_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "arrow/c/abi.h"  // struct ArrowSchema, struct ArrowArrayStream
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// --- Status codes ----------------------------------------------------------
+// 0 on success; nonzero classifies the failure. On error the call also writes
+// a malloc'd, NUL-terminated message to *out_err (free with df_error_free).
+typedef enum {
+  DF_OK = 0,
+  DF_INVALID_ARGUMENT = 1,
+  DF_UNKNOWN_PROVIDER = 2,
+  DF_PROVIDER_BUILD = 3,
+  DF_PLANNING = 4,
+  DF_EXECUTION = 5,
+  DF_PANIC = 6,
+  DF_INTERNAL = 7
+} DfStatus;
+
+// --- Borrowed input views (caller owns the memory) -------------------------
+typedef struct {
+  const uint8_t* ptr;  // UTF-8, not NUL-terminated; may be null if len == 0
+  size_t len;
+} DfStr;
+
+typedef struct {
+  const uint8_t* ptr;  // may be null if len == 0
+  size_t len;
+} DfBytes;
+
+typedef struct {
+  DfStr key;
+  DfStr value;
+} DfKeyValue;
+
+// Opaque planned-scan handle.
+typedef struct DfScanHandle DfScanHandle;
+
+// --- Lifecycle / versioning ------------------------------------------------
+
+// ABI major version; compare before any other call.
+uint64_t df_scan_abi_version(void);
+
+// Free a message previously written to an out_err argument (null-safe).
+void df_error_free(char* err);
+
+// --- Scan API --------------------------------------------------------------
+
+// Probe a provider's output schema into the caller-allocated out_schema.
+int32_t df_scan_schema(DfStr provider, DfBytes options, DfBytes partition,
+                       struct ArrowSchema* out_schema, char** out_err);
+
+// Plan a scan. On success writes an owned handle to *out_handle (release with
+// df_scan_close). projection is an array of column-name DfStr (empty = all);
+// filters is an array of serialized datafusion.LogicalExprNode DfBytes;
+// target_partitions / batch_size <= 0 keep DataFusion defaults; limit < 0 means
+// no row limit.
+int32_t df_scan_create(DfStr provider, DfBytes options, DfBytes partition,
+                       int32_t target_partitions, int32_t batch_size, int64_t limit,
+                       const DfKeyValue* config_overrides, size_t config_overrides_len,
+                       const DfStr* projection, size_t projection_len,
+                       const DfBytes* filters, size_t filters_len,
+                       DfScanHandle** out_handle, char** out_err);
+
+// Output partition count of the planned scan.
+int32_t df_scan_partition_count(const DfScanHandle* handle, int32_t* out_count,
+                                char** out_err);
+
+// Execute one partition into the caller-allocated Arrow C Stream.
+int32_t df_scan_execute_partition(const DfScanHandle* handle, int32_t partition,
+                                  struct ArrowArrayStream* out_stream, char** out_err);
+
+// Execute the whole plan as a single coalesced Arrow C Stream.
+int32_t df_scan_execute(const DfScanHandle* handle,
+                        struct ArrowArrayStream* out_stream, char** out_err);
+
+// Drop a planned scan (null-safe). Must not race an in-flight execute on the
+// same handle.
+void df_scan_close(DfScanHandle* handle);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // DATAFUSION_SCAN_H
diff --git a/native-ffi/src/abi.rs b/native-ffi/src/abi.rs
new file mode 100644
index 0000000..f037ad5
--- /dev/null
+++ b/native-ffi/src/abi.rs
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! The plain-C front door: `extern "C"` entry points over C and Arrow C types.
+//!
+//! No `JNIEnv`, no JVM types, no name mangling -- the exported symbols are
+//! `df_scan_*` / `df_error_*` and the only "rich" types that cross are the
+//! standard Arrow C Data (`ArrowSchema`) and C Stream (`ArrowArrayStream`)
+//! structs. A Java consumer reaches these through a ~2-method JNI shim or the
+//! JDK 22+ FFM API; Python/Go/R/Rust reach them directly.
+//!
+//! Convention: every fallible call returns `0` on success and a nonzero
+//! [`DfStatus`](crate::error::DfStatus) on failure, writing a malloc'd message
+//! to `*out_err` (freed via [`df_error_free`]). Each is wrapped in
+//! `catch_unwind` so a Rust panic becomes [`DfStatus::Panic`] instead of
+//! unwinding across the C boundary (UB).
+
+use std::ffi::c_char;
+use std::os::raw::c_int;
+use std::panic::{catch_unwind, AssertUnwindSafe};
+
+use datafusion::arrow::ffi::FFI_ArrowSchema;
+use datafusion::arrow::ffi_stream::FFI_ArrowArrayStream;
+
+use crate::error::{finish, report, DfStatus, ScanError, ScanResult};
+use crate::ffi_types::{array, DfBytes, DfKeyValue, DfStr};
+use crate::reader::panic_message;
+use crate::scan::{self, ScanHandle, ScanRequest};
+
+/// Opaque handle to a planned scan. Created by [`df_scan_create`], freed by
+/// [`df_scan_close`]. Never dereferenced by the consumer.
+pub struct DfScanHandle {
+    inner: ScanHandle,
+}
+
+/// Run `body`, turning a caught panic into a [`DfStatus::Panic`] status.
+///
+/// # Safety
+/// `out_err` must be null or a writable `*mut *mut c_char`.
+unsafe fn guard(out_err: *mut *mut c_char, body: impl FnOnce() -> ScanResult<()>) -> c_int {
+    match catch_unwind(AssertUnwindSafe(body)) {
+        Ok(result) => finish(out_err, result),
+        Err(p) => report(
+            out_err,
+            ScanError::new(
+                DfStatus::Panic,
+                format!("panic in datafusion-scan-ffi: {}", panic_message(&p)),
+            ),
+        ),
+    }
+}
+
+/// Major version of the ABI. A consumer compares this against the value it was
+/// compiled for before calling anything else.
+#[no_mangle]
+pub extern "C" fn df_scan_abi_version() -> u64 {
+    crate::ABI_VERSION
+}
+
+/// Free an error string previously written to an `out_err` argument. Safe to
+/// call with null.
+///
+/// # Safety
+/// `err` must be null or a pointer previously returned through `out_err` by
+/// one of the `df_scan_*` calls, and must not be used afterwards.
+#[no_mangle]
+pub unsafe extern "C" fn df_error_free(err: *mut c_char) {
+    if !err.is_null() {
+        drop(std::ffi::CString::from_raw(err));
+    }
+}
+
+/// Probe a provider's output schema, writing an Arrow C Schema into the
+/// caller-allocated `out_schema`.
+///
+/// # Safety
+/// All pointer args follow the documented `(ptr, len)` borrow contract;
+/// `out_schema` must point to a writable, uninitialized `ArrowSchema`.
+#[no_mangle]
+pub unsafe extern "C" fn df_scan_schema(
+    provider: DfStr,
+    options: DfBytes,
+    partition: DfBytes,
+    out_schema: *mut FFI_ArrowSchema,
+    out_err: *mut *mut c_char,
+) -> c_int {
+    guard(out_err, || {
+        if out_schema.is_null() {
+            return Err(ScanError::invalid_argument("out_schema is null"));
+        }
+        let name = provider.as_str()?;
+        let schema = scan::schema(name, options.as_slice(), partition.as_slice())?;
+        let ffi = FFI_ArrowSchema::try_from(schema.as_ref())?;
+        std::ptr::write(out_schema, ffi);
+        Ok(())
+    })
+}
+
+/// Plan a scan. On success writes an owned [`DfScanHandle`] pointer to
+/// `*out_handle`; the caller must release it with [`df_scan_close`].
+///
+/// Session config overrides are a single `config_overrides` array of
+/// [`DfKeyValue`]. `projection` is an array of column-name [`DfStr`]s (empty
+/// selects all). `filters` is an array of serialized `datafusion.LogicalExprNode`
+/// [`DfBytes`]. `limit` is the pushed row limit; a negative value means none.
+///
+/// # Safety
+/// Array args follow the `(ptr, len)` borrow contract; `out_handle` must be a
+/// writable `*mut *mut DfScanHandle`.
+#[no_mangle]
+#[allow(clippy::too_many_arguments)]
+pub unsafe extern "C" fn df_scan_create(
+    provider: DfStr,
+    options: DfBytes,
+    partition: DfBytes,
+    target_partitions: c_int,
+    batch_size: c_int,
+    limit: i64,
+    config_overrides: *const DfKeyValue,
+    config_overrides_len: usize,
+    projection: *const DfStr,
+    projection_len: usize,
+    filters: *const DfBytes,
+    filters_len: usize,
+    out_handle: *mut *mut DfScanHandle,
+    out_err: *mut *mut c_char,
+) -> c_int {
+    guard(out_err, || {
+        if out_handle.is_null() {
+            return Err(ScanError::invalid_argument("out_handle is null"));
+        }
+        let provider = provider.as_str()?;
+
+        let mut overrides = Vec::with_capacity(config_overrides_len);
+        for kv in array(config_overrides, config_overrides_len) {
+            overrides.push((kv.key.as_str()?.to_string(), kv.value.as_str()?.to_string()));
+        }
+        let mut cols = Vec::with_capacity(projection_len);
+        for s in array(projection, projection_len) {
+            cols.push(s.as_str()?.to_string());
+        }
+        let mut filter_bytes = Vec::with_capacity(filters_len);
+        for b in array(filters, filters_len) {
+            filter_bytes.push(b.as_slice().to_vec());
+        }
+
+        let handle = scan::create(ScanRequest {
+            provider,
+            options: options.as_slice(),
+            partition: partition.as_slice(),
+            target_partitions,
+            batch_size,
+            limit: if limit < 0 { None } else { Some(limit as usize) },
+            config_overrides: overrides,
+            projection: cols,
+            filters: filter_bytes,
+        })?;
+
+        let boxed = Box::new(DfScanHandle { inner: handle });
+        std::ptr::write(out_handle, Box::into_raw(boxed));
+        Ok(())
+    })
+}
+
+/// Number of output partitions of the planned scan.
+///
+/// # Safety
+/// `handle` must be a live pointer from [`df_scan_create`]; `out_count` must be
+/// writable.
+#[no_mangle]
+pub unsafe extern "C" fn df_scan_partition_count(
+    handle: *const DfScanHandle,
+    out_count: *mut c_int,
+    out_err: *mut *mut c_char,
+) -> c_int {
+    guard(out_err, || {
+        let h = handle
+            .as_ref()
+            .ok_or_else(|| ScanError::invalid_argument("scan handle is null"))?;
+        if out_count.is_null() {
+            return Err(ScanError::invalid_argument("out_count is null"));
+        }
+        std::ptr::write(out_count, h.inner.partition_count() as c_int);
+        Ok(())
+    })
+}
+
+/// Execute one plan partition, writing an `FFI_ArrowArrayStream` into the
+/// caller-allocated `out_stream`. The consumer imports it with its Arrow C
+/// Stream importer (e.g. arrow-java `Data.importArrayStream`).
+///
+/// # Safety
+/// `handle` live; `out_stream` points to a writable, uninitialized
+/// `ArrowArrayStream`.
+#[no_mangle]
+pub unsafe extern "C" fn df_scan_execute_partition(
+    handle: *const DfScanHandle,
+    partition: c_int,
+    out_stream: *mut FFI_ArrowArrayStream,
+    out_err: *mut *mut c_char,
+) -> c_int {
+    guard(out_err, || {
+        let h = handle
+            .as_ref()
+            .ok_or_else(|| ScanError::invalid_argument("scan handle is null"))?;
+        if out_stream.is_null() {
+            return Err(ScanError::invalid_argument("out_stream is null"));
+        }
+        if partition < 0 {
+            return Err(ScanError::invalid_argument("partition index is negative"));
+        }
+        let reader = h.inner.execute_partition(partition as usize)?;
+        let ffi = FFI_ArrowArrayStream::new(Box::new(reader));
+        std::ptr::write(out_stream, ffi);
+        Ok(())
+    })
+}
+
+/// Execute the whole plan as a single coalesced stream.
+///
+/// # Safety
+/// As [`df_scan_execute_partition`].
+#[no_mangle]
+pub unsafe extern "C" fn df_scan_execute(
+    handle: *const DfScanHandle,
+    out_stream: *mut FFI_ArrowArrayStream,
+    out_err: *mut *mut c_char,
+) -> c_int {
+    guard(out_err, || {
+        let h = handle
+            .as_ref()
+            .ok_or_else(|| ScanError::invalid_argument("scan handle is null"))?;
+        if out_stream.is_null() {
+            return Err(ScanError::invalid_argument("out_stream is null"));
+        }
+        let reader = h.inner.execute_all()?;
+        let ffi = FFI_ArrowArrayStream::new(Box::new(reader));
+        std::ptr::write(out_stream, ffi);
+        Ok(())
+    })
+}
+
+/// Drop a planned scan. Must not race an in-flight execute on the same handle;
+/// the consumer is responsible for that ordering. Safe to call with null.
+///
+/// # Safety
+/// `handle` must be null or a live pointer from [`df_scan_create`], not used
+/// afterwards.
+#[no_mangle]
+pub unsafe extern "C" fn df_scan_close(handle: *mut DfScanHandle) {
+    if !handle.is_null() {
+        drop(Box::from_raw(handle));
+    }
+}
diff --git a/native-ffi/src/demo.rs b/native-ffi/src/demo.rs
new file mode 100644
index 0000000..ca27f3b
--- /dev/null
+++ b/native-ffi/src/demo.rs
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! A reference in-memory provider builder, gated behind the `demo-providers`
+//! feature. Registered under `datafusion.memory`; the `options` bytes are
+//! ignored. Used by the round-trip tests and as a minimal example of what a
+//! real consumer's builder looks like.
+
+use std::sync::Arc;
+
+use datafusion::arrow::array::{Int64Array, StringArray};
+use datafusion::arrow::datatypes::{DataType, Field, Schema};
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::catalog::TableProvider;
+use datafusion::datasource::MemTable;
+use datafusion::prelude::SessionContext;
+
+use crate::error::{DfStatus, ScanError, ScanResult};
+use crate::registry::register_provider;
+
+/// Registered builder name for the demo provider.
+pub const NAME: &str = "datafusion.memory";
+
+/// Register the demo provider. Call once at startup.
+pub fn register() {
+    register_provider(NAME, build);
+}
+
+/// Two-column (`id: Int64`, `name: Utf8`), two-batch in-memory table across
+/// two partitions, so partition-count behavior is observable.
+fn build(
+    _ctx: &SessionContext,
+    _options: &[u8],
+    _partition: &[u8],
+) -> ScanResult<Arc<dyn TableProvider>> {
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("id", DataType::Int64, false),
+        Field::new("name", DataType::Utf8, true),
+    ]));
+
+    let batch = |ids: Vec<i64>, names: Vec<&str>| -> ScanResult<RecordBatch> {
+        RecordBatch::try_new(
+            schema.clone(),
+            vec![
+                Arc::new(Int64Array::from(ids)),
+                Arc::new(StringArray::from(names)),
+            ],
+        )
+        .map_err(ScanError::from)
+    };
+
+    let p0 = batch(vec![1, 2, 3], vec!["a", "b", "c"])?;
+    let p1 = batch(vec![4, 5], vec!["d", "e"])?;
+
+    MemTable::try_new(schema, vec![vec![p0], vec![p1]])
+        .map(|t| Arc::new(t) as Arc<dyn TableProvider>)
+        .map_err(|e| ScanError::new(DfStatus::ProviderBuild, e.to_string()))
+}
diff --git a/native-ffi/src/error.rs b/native-ffi/src/error.rs
new file mode 100644
index 0000000..71d8164
--- /dev/null
+++ b/native-ffi/src/error.rs
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Error model for the C ABI.
+//!
+//! Rust-internal code works with [`ScanError`]; the `extern "C"` layer turns it
+//! into an `i32` [`DfStatus`] return plus a heap-allocated message string. No
+//! Rust error type ever crosses the boundary -- only a code and UTF-8 bytes.
+
+use std::ffi::{c_char, CString};
+use std::os::raw::c_int;
+
+use datafusion::arrow::error::ArrowError;
+use datafusion::error::DataFusionError;
+
+/// Status codes returned by every fallible `df_scan_*` call. `0` is success;
+/// the rest classify the failure coarsely so a consumer can branch without
+/// parsing the message. Stable across an `ABI_VERSION`.
+#[repr(i32)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum DfStatus {
+    Ok = 0,
+    /// A required pointer argument was null, or a length/index was invalid.
+    InvalidArgument = 1,
+    /// `provider` is not a registered builder name.
+    UnknownProvider = 2,
+    /// The provider builder itself failed.
+    ProviderBuild = 3,
+    /// Planning failed (projection, filter decode, physical planning).
+    Planning = 4,
+    /// Stream execution setup failed.
+    Execution = 5,
+    /// A Rust panic was caught at the boundary.
+    Panic = 6,
+    /// Anything not covered above.
+    Internal = 7,
+}
+
+/// Internal error carrying a status class and a human-readable message.
+#[derive(Debug)]
+pub struct ScanError {
+    pub status: DfStatus,
+    pub message: String,
+}
+
+impl ScanError {
+    pub fn new(status: DfStatus, message: impl Into<String>) -> Self {
+        Self {
+            status,
+            message: message.into(),
+        }
+    }
+
+    pub fn invalid_argument(message: impl Into<String>) -> Self {
+        Self::new(DfStatus::InvalidArgument, message)
+    }
+}
+
+impl From<DataFusionError> for ScanError {
+    fn from(e: DataFusionError) -> Self {
+        Self::new(DfStatus::Planning, e.to_string())
+    }
+}
+
+impl From<ArrowError> for ScanError {
+    fn from(e: ArrowError) -> Self {
+        Self::new(DfStatus::Internal, e.to_string())
+    }
+}
+
+impl From<prost::DecodeError> for ScanError {
+    fn from(e: prost::DecodeError) -> Self {
+        Self::new(
+            DfStatus::Planning,
+            format!("failed to decode pushed filter as LogicalExprNode: {e}"),
+        )
+    }
+}
+
+pub type ScanResult<T> = Result<T, ScanError>;
+
+/// Write `err`'s message into `*out_err` as a freshly allocated,
+/// NUL-terminated C string (freed by the caller via `df_error_free`) and
+/// return its status code as `c_int`. `out_err` may be null, in which case the
+/// message is dropped and only the code is returned.
+///
+/// # Safety
+/// `out_err` must be null or point to a writable `*mut c_char`.
+pub unsafe fn report(out_err: *mut *mut c_char, err: ScanError) -> c_int {
+    if !out_err.is_null() {
+        // NUL bytes in the message would truncate it; replace defensively.
+        let sanitized = err.message.replace('\0', "\u{fffd}");
+        match CString::new(sanitized) {
+            Ok(c) => *out_err = c.into_raw(),
+            Err(_) => *out_err = std::ptr::null_mut(),
+        }
+    }
+    err.status as c_int
+}
+
+/// Collapse a `ScanResult<()>` into a status code, reporting any error through
+/// `out_err`.
+///
+/// # Safety
+/// See [`report`].
+pub unsafe fn finish(out_err: *mut *mut c_char, result: ScanResult<()>) -> c_int {
+    match result {
+        Ok(()) => DfStatus::Ok as c_int,
+        Err(e) => report(out_err, e),
+    }
+}
diff --git a/native-ffi/src/ffi_types.rs b/native-ffi/src/ffi_types.rs
new file mode 100644
index 0000000..a892a62
--- /dev/null
+++ b/native-ffi/src/ffi_types.rs
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Borrowed C views passed *into* the ABI.
+//!
+//! These are non-owning `(ptr, len)` pairs: the caller owns the memory and
+//! keeps it valid for the duration of the call. Nothing here is allocated or
+//! freed by Rust. Using explicit `(ptr, len)` slices (rather than
+//! NUL-terminated strings) means the surface is FFM-friendly and binary-safe.
+
+use std::slice;
+
+use crate::error::{ScanError, ScanResult};
+
+/// A borrowed UTF-8 string slice. Not NUL-terminated.
+#[repr(C)]
+#[derive(Clone, Copy)]
+pub struct DfStr {
+    pub ptr: *const u8,
+    pub len: usize,
+}
+
+/// A borrowed byte slice.
+#[repr(C)]
+#[derive(Clone, Copy)]
+pub struct DfBytes {
+    pub ptr: *const u8,
+    pub len: usize,
+}
+
+/// A borrowed `(key, value)` UTF-8 pair, for session config overrides.
+#[repr(C)]
+#[derive(Clone, Copy)]
+pub struct DfKeyValue {
+    pub key: DfStr,
+    pub value: DfStr,
+}
+
+impl DfStr {
+    /// # Safety
+    /// `ptr` must be null or point to `len` valid bytes of UTF-8 that stay
+    /// alive for the borrow.
+    pub unsafe fn as_str(&self) -> ScanResult<&str> {
+        let bytes = self.as_bytes();
+        std::str::from_utf8(bytes)
+            .map_err(|e| ScanError::invalid_argument(format!("argument is not valid UTF-8: {e}")))
+    }
+
+    /// # Safety
+    /// See [`DfStr::as_str`].
+    pub unsafe fn as_bytes(&self) -> &[u8] {
+        if self.ptr.is_null() || self.len == 0 {
+            &[]
+        } else {
+            slice::from_raw_parts(self.ptr, self.len)
+        }
+    }
+}
+
+impl DfBytes {
+    /// # Safety
+    /// `ptr` must be null or point to `len` valid bytes alive for the borrow.
+    pub unsafe fn as_slice(&self) -> &[u8] {
+        if self.ptr.is_null() || self.len == 0 {
+            &[]
+        } else {
+            slice::from_raw_parts(self.ptr, self.len)
+        }
+    }
+}
+
+/// View a `(ptr, len)` array argument as a slice, treating null+0 as empty.
+///
+/// # Safety
+/// `ptr` must be null or point to `len` valid `T` for the borrow.
+pub unsafe fn array<'a, T>(ptr: *const T, len: usize) -> &'a [T] {
+    if ptr.is_null() || len == 0 {
+        &[]
+    } else {
+        slice::from_raw_parts(ptr, len)
+    }
+}
diff --git a/native-ffi/src/lib.rs b/native-ffi/src/lib.rs
new file mode 100644
index 0000000..0f83e74
--- /dev/null
+++ b/native-ffi/src/lib.rs
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! A plain-C scan ABI over the Arrow C Data / C Stream interface.
+//!
+//! This crate exposes a DataFusion [`TableProvider`](datafusion::catalog::TableProvider)
+//! scan as a set of `extern "C"` entry points that speak only C types and the
+//! Arrow C Data interface. There is **no JVM/JNI dependency**: the front door
+//! is callable from Java (via a thin JNI shim or the JDK 22+ FFM API), but also
+//! from Python (cffi/ctypes), Go (cgo), R, or another Rust crate. That is the
+//! property that lets the surface live close to DataFusion proper and get
+//! reviewed by a wider audience -- the request on
+//! <https://github.com/apache/datafusion-java/pull/104>.
+//!
+//! # Shape
+//!
+//! Providers are *compiled into* the final cdylib ("approach A"): a consumer
+//! links this crate as an `rlib`, [`register_provider`]s its builders by name,
+//! and the `df_scan_*` symbols are exported from the resulting shared library.
+//! The data plane never crosses as serialized batches -- each scanned
+//! partition is handed back as a standard `FFI_ArrowArrayStream` the consumer
+//! imports zero-copy.
+//!
+//! # The ABI
+//!
+//! See `include/datafusion_scan.h` for the C header. In brief:
+//!
+//! - [`abi::df_scan_schema`]            -- probe the output schema (Arrow C Schema)
+//! - [`abi::df_scan_create`]            -- plan a scan, returns an opaque handle
+//! - [`abi::df_scan_partition_count`]   -- number of output partitions
+//! - [`abi::df_scan_execute_partition`] -- one partition  -> Arrow C Stream
+//! - [`abi::df_scan_execute`]           -- whole plan      -> Arrow C Stream
+//! - [`abi::df_scan_close`]             -- drop the handle
+//! - [`abi::df_error_free`]             -- free an error string
+//! - [`abi::df_scan_abi_version`]       -- ABI major version for compatibility
+//!
+//! Every fallible call returns `0` on success and a nonzero
+//! [`error::DfStatus`] code on failure, setting `*out_err` to a malloc'd,
+//! NUL-terminated message the caller frees with `df_error_free`.
+
+pub mod abi;
+pub mod error;
+pub mod ffi_types;
+pub mod listing;
+pub mod reader;
+pub mod registry;
+pub mod runtime;
+pub mod scan;
+
+/// Generated protobuf types for the scan config / request wire formats
+/// (`proto/scan_config.proto`, `proto/scan_request.proto`). The `ScanConfig`
+/// blob is decoded by provider builders; `ScanRequest` is the engine-side
+/// staging object exploded into the C call's typed arguments.
+pub mod proto {
+    include!(concat!(env!("OUT_DIR"), "/datafusion_java.rs"));
+}
+
+#[cfg(feature = "demo-providers")]
+pub mod demo;
+
+pub use registry::register_provider;
+
+/// Major version of this ABI. Bumped on any breaking change to a `df_scan_*`
+/// signature or to the meaning of its arguments. Consumers compare against the
+/// value they were built for via [`abi::df_scan_abi_version`].
+pub const ABI_VERSION: u64 = 1;
diff --git a/native-ffi/src/listing.rs b/native-ffi/src/listing.rs
new file mode 100644
index 0000000..5b8aed6
--- /dev/null
+++ b/native-ffi/src/listing.rs
@@ -0,0 +1,226 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! A real file-backed provider builder, registered under `datafusion.listing`.
+//!
+//! Decodes the [`ScanConfig`](crate::proto::ScanConfig) blob into a DataFusion
+//! [`ListingTable`] over one or more paths read with a single file format.
+//! Demonstrates a builder that needs the session context: when no explicit
+//! schema is supplied it infers one from the data (and the context's object
+//! store registry resolves the paths).
+//!
+//! Object stores for remote URIs (s3://, gs://, ...) must be registered on the
+//! context by the embedding cdylib before a scan runs; the default context
+//! resolves local paths out of the box.
+
+use std::io::Cursor;
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{Schema, SchemaRef};
+use datafusion::arrow::ipc::reader::StreamReader;
+use datafusion::catalog::TableProvider;
+use datafusion::datasource::file_format::arrow::ArrowFormat;
+use datafusion::datasource::file_format::avro::AvroFormat;
+use datafusion::datasource::file_format::csv::CsvFormat;
+use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
+use datafusion::datasource::file_format::json::JsonFormat;
+use datafusion::datasource::file_format::parquet::ParquetFormat;
+use datafusion::datasource::file_format::FileFormat;
+use datafusion::datasource::listing::{
+    ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
+};
+use datafusion::prelude::SessionContext;
+use prost::Message;
+
+use crate::error::{DfStatus, ScanError, ScanResult};
+use crate::proto::{listing_source, scan_config, FileCompressionType as ProtoCompression};
+use crate::proto::{ListingSource, ScanConfig};
+use crate::registry::register_provider;
+use crate::runtime::handle;
+
+/// Registered builder name for the listing provider.
+pub const NAME: &str = "datafusion.listing";
+
+/// Register the listing provider. Call once at startup.
+pub fn register() {
+    register_provider(NAME, build);
+}
+
+fn build(
+    ctx: &SessionContext,
+    options: &[u8],
+    _partition: &[u8],
+) -> ScanResult<Arc<dyn TableProvider>> {
+    let config = ScanConfig::decode(options).map_err(|e| {
+        ScanError::new(
+            DfStatus::ProviderBuild,
+            format!("failed to decode ScanConfig: {e}"),
+        )
+    })?;
+
+    let listing = match config.source {
+        Some(scan_config::Source::Listing(l)) => l,
+        Some(scan_config::Source::Custom(_)) => {
+            return Err(ScanError::new(
+                DfStatus::ProviderBuild,
+                "datafusion.listing requires a listing source, got custom bytes",
+            ))
+        }
+        None => {
+            return Err(ScanError::new(
+                DfStatus::ProviderBuild,
+                "datafusion.listing requires a listing source, none set",
+            ))
+        }
+    };
+
+    if listing.paths.is_empty() {
+        return Err(ScanError::new(
+            DfStatus::ProviderBuild,
+            "listing source has no paths",
+        ));
+    }
+
+    let table_paths = listing
+        .paths
+        .iter()
+        .map(|p| {
+            ListingTableUrl::parse(p).map_err(|e| {
+                ScanError::new(DfStatus::ProviderBuild, format!("invalid path {p:?}: {e}"))
+            })
+        })
+        .collect::<ScanResult<Vec<_>>>()?;
+
+    let listing_options = listing_options(&listing)?;
+
+    let mut table_config =
+        ListingTableConfig::new_with_multi_paths(table_paths).with_listing_options(listing_options);
+
+    table_config = match &listing.schema_ipc {
+        Some(bytes) => table_config.with_schema(schema_from_ipc(bytes)?),
+        // No explicit schema: infer from the data, using the context's state
+        // (and thus its object store registry) to read it.
+        None => handle()
+            .block_on(table_config.infer_schema(&ctx.state()))
+            .map_err(|e| {
+                ScanError::new(
+                    DfStatus::ProviderBuild,
+                    format!("failed to infer listing schema: {e}"),
+                )
+            })?,
+    };
+
+    let table = ListingTable::try_new(table_config)
+        .map_err(|e| ScanError::new(DfStatus::ProviderBuild, e.to_string()))?;
+    Ok(Arc::new(table))
+}
+
+/// Map the proto format oneof to a DataFusion [`ListingOptions`]. Covers the
+/// option fields the read-option messages expose today; unset fields keep the
+/// format's defaults.
+fn listing_options(listing: &ListingSource) -> ScanResult<ListingOptions> {
+    use listing_source::Format;
+
+    let (format, default_ext): (Arc<dyn FileFormat>, &str) = match &listing.format {
+        Some(Format::Csv(c)) => {
+            let mut fmt = CsvFormat::default()
+                .with_has_header(c.has_header)
+                .with_delimiter(byte(c.delimiter, b',')?)
+                .with_quote(byte(c.quote, b'"')?)
+                .with_newlines_in_values(c.newlines_in_values.unwrap_or(false))
+                .with_file_compression_type(compression(c.file_compression_type));
+            if let Some(t) = c.terminator {
+                fmt = fmt.with_terminator(Some(byte(t, b'\n')?));
+            }
+            if let Some(e) = c.escape {
+                fmt = fmt.with_escape(Some(byte(e, b'\\')?));
+            }
+            if let Some(cm) = c.comment {
+                fmt = fmt.with_comment(Some(byte(cm, b'#')?));
+            }
+            (Arc::new(fmt), extension(&c.file_extension, ".csv"))
+        }
+        Some(Format::Json(j)) => {
+            let fmt = JsonFormat::default()
+                .with_file_compression_type(compression(j.file_compression_type));
+            (Arc::new(fmt), extension(&j.file_extension, ".json"))
+        }
+        Some(Format::Parquet(p)) => {
+            // Parquet read tuning (pruning / metadata hints) is applied through
+            // session config at scan time, not on the format here.
+            (
+                Arc::new(ParquetFormat::default()),
+                extension(&p.file_extension, ".parquet"),
+            )
+        }
+        Some(Format::Avro(a)) => (Arc::new(AvroFormat), extension(&a.file_extension, ".avro")),
+        Some(Format::Arrow(a)) => (
+            Arc::new(ArrowFormat),
+            extension(&a.file_extension, ".arrow"),
+        ),
+        None => {
+            return Err(ScanError::new(
+                DfStatus::ProviderBuild,
+                "listing source has no file format",
+            ))
+        }
+    };
+
+    Ok(ListingOptions::new(format).with_file_extension(default_ext.to_string()))
+}
+
+/// A single byte sent over the wire as a `uint32`. Falls back to `default` when
+/// the field is unset (0), and rejects values that do not fit in a byte.
+fn byte(value: u32, default: u8) -> ScanResult<u8> {
+    if value == 0 {
+        return Ok(default);
+    }
+    u8::try_from(value)
+        .map_err(|_| ScanError::invalid_argument(format!("byte option {value} exceeds 255")))
+}
+
+fn extension<'a>(configured: &'a str, default: &'a str) -> &'a str {
+    if configured.is_empty() {
+        default
+    } else {
+        configured
+    }
+}
+
+fn compression(value: i32) -> FileCompressionType {
+    match ProtoCompression::try_from(value) {
+        Ok(ProtoCompression::Gzip) => FileCompressionType::GZIP,
+        Ok(ProtoCompression::Bzip2) => FileCompressionType::BZIP2,
+        Ok(ProtoCompression::Xz) => FileCompressionType::XZ,
+        Ok(ProtoCompression::Zstd) => FileCompressionType::ZSTD,
+        // Unspecified / uncompressed / unknown -> uncompressed.
+        _ => FileCompressionType::UNCOMPRESSED,
+    }
+}
+
+/// Read a `SchemaRef` from Arrow IPC stream bytes (a schema message, optionally
+/// followed by zero batches -- the shape `StreamWriter::finish` produces).
+fn schema_from_ipc(bytes: &[u8]) -> ScanResult<SchemaRef> {
+    let reader = StreamReader::try_new(Cursor::new(bytes), None).map_err(|e| {
+        ScanError::new(
+            DfStatus::ProviderBuild,
+            format!("failed to read schema_ipc: {e}"),
+        )
+    })?;
+    let schema: Schema = reader.schema().as_ref().clone();
+    Ok(Arc::new(schema))
+}
diff --git a/native-ffi/src/reader.rs b/native-ffi/src/reader.rs
new file mode 100644
index 0000000..445668e
--- /dev/null
+++ b/native-ffi/src/reader.rs
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Bridge from DataFusion's async stream to the synchronous
+//! [`RecordBatchReader`] that `FFI_ArrowArrayStream` pulls.
+
+use std::panic::{catch_unwind, AssertUnwindSafe};
+
+use datafusion::arrow::array::RecordBatch;
+use datafusion::arrow::datatypes::SchemaRef;
+use datafusion::arrow::error::ArrowError;
+use datafusion::arrow::record_batch::RecordBatchReader;
+use datafusion::execution::SendableRecordBatchStream;
+use futures::StreamExt;
+
+use crate::runtime::runtime;
+
+/// Wraps a [`SendableRecordBatchStream`] as a [`RecordBatchReader`]. Each
+/// `next()` drives one `block_on(stream.next())`, so memory stays bounded by
+/// the pipeline plus a single in-flight batch.
+pub struct StreamingReader {
+    pub schema: SchemaRef,
+    pub stream: SendableRecordBatchStream,
+}
+
+impl Iterator for StreamingReader {
+    type Item = Result<RecordBatch, ArrowError>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // Arrow's C Stream vtable calls this from the *consumer's* thread,
+        // outside any guard. A panic unwinding across the C boundary is UB, so
+        // catch it and surface as an ArrowError -- the consumer sees a normal
+        // stream error (mapped to an exception on the Java side).
+        let next = catch_unwind(AssertUnwindSafe(|| runtime().block_on(self.stream.next())));
+        match next {
+            Ok(item) => item.map(|r| r.map_err(|e| ArrowError::ExternalError(Box::new(e)))),
+            Err(panic) => Some(Err(ArrowError::ExternalError(
+                format!("panic in DataFusion stream: {}", panic_message(&panic)).into(),
+            ))),
+        }
+    }
+}
+
+impl RecordBatchReader for StreamingReader {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
+
+/// Best-effort extraction of a panic payload's message.
+pub fn panic_message(panic: &(dyn std::any::Any + Send)) -> String {
+    if let Some(s) = panic.downcast_ref::<&str>() {
+        (*s).to_string()
+    } else if let Some(s) = panic.downcast_ref::<String>() {
+        s.clone()
+    } else {
+        "unknown panic".to_string()
+    }
+}
diff --git a/native-ffi/src/registry.rs b/native-ffi/src/registry.rs
new file mode 100644
index 0000000..bccfb0c
--- /dev/null
+++ b/native-ffi/src/registry.rs
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Provider builder registry.
+//!
+//! "Approach A" means the providers ship compiled into the final cdylib rather
+//! than being imported over an FFI. A consumer registers each builder by name
+//! at startup; the C ABI selects one by that name and hands it the opaque
+//! `options`/`partition` byte blobs it was given. The builder decodes those
+//! however it likes (protobuf, JSON, bincode) -- the ABI stays oblivious.
+
+use std::collections::HashMap;
+use std::sync::{Arc, RwLock};
+
+use datafusion::catalog::TableProvider;
+use datafusion::prelude::SessionContext;
+
+use crate::error::{DfStatus, ScanError, ScanResult};
+
+/// Builds a provider from caller-supplied bytes.
+///
+/// * `ctx`       -- the scan's session context, already configured with the
+///   caller's tuning/overrides. A builder that must infer a schema or read an
+///   object store (e.g. a listing table) uses `ctx.state()` for that; simple
+///   in-memory providers ignore it.
+/// * `options`   -- provider-level config (which table, paths, schema, ...).
+/// * `partition` -- optional per-partition slice descriptor; empty for a
+///   whole-table scan.
+///
+/// `options`/`partition` are opaque to the ABI; their encoding is a contract
+/// between the registrant and whoever fills the bytes on the other side of the
+/// boundary (the in-tree builders use [`crate::proto::ScanConfig`]).
+pub type ProviderBuilder = fn(
+    ctx: &SessionContext,
+    options: &[u8],
+    partition: &[u8],
+) -> ScanResult<Arc<dyn TableProvider>>;
+
+fn registry() -> &'static RwLock<HashMap<String, ProviderBuilder>> {
+    static REGISTRY: std::sync::OnceLock<RwLock<HashMap<String, ProviderBuilder>>> =
+        std::sync::OnceLock::new();
+    REGISTRY.get_or_init(|| RwLock::new(HashMap::new()))
+}
+
+/// Register `builder` under `name`, replacing any previous registration.
+/// Call once per provider at cdylib startup (e.g. from a `#[ctor]` or an
+/// exported init function the consumer invokes).
+pub fn register_provider(name: impl Into<String>, builder: ProviderBuilder) {
+    registry()
+        .write()
+        .expect("provider registry poisoned")
+        .insert(name.into(), builder);
+}
+
+/// Look up `name` and build a provider from the given bytes.
+pub fn build_provider(
+    name: &str,
+    ctx: &SessionContext,
+    options: &[u8],
+    partition: &[u8],
+) -> ScanResult<Arc<dyn TableProvider>> {
+    let builder = {
+        let guard = registry().read().expect("provider registry poisoned");
+        guard.get(name).copied()
+    };
+    match builder {
+        Some(b) => b(ctx, options, partition),
+        None => Err(ScanError::new(
+            DfStatus::UnknownProvider,
+            format!("no provider builder registered under name {name:?}"),
+        )),
+    }
+}
diff --git a/native-ffi/src/runtime.rs b/native-ffi/src/runtime.rs
new file mode 100644
index 0000000..87fe2e7
--- /dev/null
+++ b/native-ffi/src/runtime.rs
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! The cdylib-wide Tokio runtime.
+//!
+//! DataFusion planning and execution are async; this ABI is synchronous, so
+//! every call that awaits does so through this runtime. Statically linked into
+//! whatever cdylib embeds this crate, so it is a per-cdylib singleton -- two
+//! libraries loaded in one process get independent runtimes and cannot collide.
+//!
+//! This mirrors `datafusion-jni-common`'s runtime but is deliberately
+//! duplicated here so the C ABI carries no dependency on the JNI crate.
+
+use std::sync::OnceLock;
+
+use tokio::runtime::{Handle, Runtime};
+
+static RT: OnceLock<Runtime> = OnceLock::new();
+
+/// The shared multi-thread Tokio runtime, created on first use.
+pub fn runtime() -> &'static Runtime {
+    RT.get_or_init(|| Runtime::new().expect("failed to create Tokio runtime"))
+}
+
+/// Handle to [`runtime`], for `block_on` / `enter`.
+pub fn handle() -> &'static Handle {
+    runtime().handle()
+}
diff --git a/native-ffi/src/scan.rs b/native-ffi/src/scan.rs
new file mode 100644
index 0000000..4a668d0
--- /dev/null
+++ b/native-ffi/src/scan.rs
@@ -0,0 +1,183 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Planning and execution core, free of any C/JVM concerns.
+//!
+//! This is the JNI-free port of the logic in PR #103's `spark/bridge/src/scan.rs`:
+//! build the provider, register it on a private `SessionContext` with the
+//! caller-pinned config, apply the pruned projection and proto-encoded pushed
+//! filters, and plan once. The resulting [`ScanHandle`] then yields one
+//! independent stream per plan partition.
+//!
+//! Spark-specific type widening is intentionally **not** here: it is a consumer
+//! concern (apply a `WideningTableProvider` decorator inside the registered
+//! builder if you need it), so this core stays a faithful DataFusion scan.
+
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::SchemaRef;
+use datafusion::dataframe::DataFrame;
+use datafusion::execution::TaskContext;
+use datafusion::physical_plan::{execute_stream, ExecutionPlan};
+use datafusion::prelude::{SessionConfig, SessionContext};
+use datafusion_proto::logical_plan::from_proto::parse_expr;
+use datafusion_proto::logical_plan::DefaultLogicalExtensionCodec;
+use datafusion_proto::protobuf::LogicalExprNode;
+use prost::Message;
+
+use crate::error::{DfStatus, ScanError, ScanResult};
+use crate::reader::StreamingReader;
+use crate::registry::build_provider;
+use crate::runtime::handle;
+
+/// Registration name of the provider on the scan's private context. Never
+/// surfaces in SQL (the plan is built through the DataFrame API), so no
+/// quoting/collision concern.
+const SCAN_TABLE_NAME: &str = "df_scan";
+
+/// Inputs to [`create`], decoded from the C arguments by the ABI layer.
+pub struct ScanRequest<'a> {
+    pub provider: &'a str,
+    pub options: &'a [u8],
+    pub partition: &'a [u8],
+    /// `<= 0` leaves the DataFusion default.
+    pub target_partitions: i32,
+    /// `<= 0` leaves the DataFusion default.
+    pub batch_size: i32,
+    pub config_overrides: Vec<(String, String)>,
+    /// Column names to project; empty selects all.
+    pub projection: Vec<String>,
+    /// Each entry is a serialized `datafusion.LogicalExprNode`.
+    pub filters: Vec<Vec<u8>>,
+    /// Optional row limit pushed into the scan. `None` means no limit.
+    pub limit: Option<usize>,
+}
+
+/// A planned scan. Holds the context alive for the plan's lifetime.
+pub struct ScanHandle {
+    _ctx: SessionContext,
+    plan: Arc<dyn ExecutionPlan>,
+    task_ctx: Arc<TaskContext>,
+}
+
+/// Build the provider via the registry and return its output schema, without
+/// planning. Mirrors #103's `provider_schema_ipc`, but returns the live
+/// `SchemaRef` (the ABI converts it to an Arrow C Schema). Uses a default
+/// context -- enough for schema inference against the default (local) object
+/// store; a provider needing custom stores should be built through [`create`].
+pub fn schema(provider: &str, options: &[u8], partition: &[u8]) -> ScanResult<SchemaRef> {
+    let ctx = SessionContext::new();
+    let provider = build_provider(provider, &ctx, options, partition)?;
+    Ok(provider.schema())
+}
+
+/// Build, register, project, filter, and plan exactly once.
+pub fn create(req: ScanRequest<'_>) -> ScanResult<ScanHandle> {
+    // Build the context first: a provider may need it (schema inference, object
+    // store access) at construction time.
+    let mut config = SessionConfig::new();
+    if req.target_partitions > 0 {
+        config = config.with_target_partitions(req.target_partitions as usize);
+    }
+    if req.batch_size > 0 {
+        config = config.with_batch_size(req.batch_size as usize);
+    }
+    for (key, value) in &req.config_overrides {
+        config.options_mut().set(key, value)?;
+    }
+
+    let ctx = SessionContext::new_with_config(config);
+    let provider = build_provider(req.provider, &ctx, req.options, req.partition)?;
+    ctx.register_table(SCAN_TABLE_NAME, provider)?;
+
+    let mut df: DataFrame = handle().block_on(ctx.table(SCAN_TABLE_NAME))?;
+    if !req.projection.is_empty() {
+        let refs: Vec<&str> = req.projection.iter().map(String::as_str).collect();
+        df = df.select_columns(&refs)?;
+    }
+    for bytes in &req.filters {
+        let node = LogicalExprNode::decode(bytes.as_slice())?;
+        // TaskContext implements FunctionRegistry; the default codec suffices
+        // for the column/literal/builtin expressions a predicate translator
+        // emits.
+        let registry = df.task_ctx();
+        let expr = parse_expr(&node, &registry, &DefaultLogicalExtensionCodec {})
+            .map_err(|e| ScanError::new(DfStatus::Planning, e.to_string()))?;
+        df = df.filter(expr)?;
+    }
+    if let Some(fetch) = req.limit {
+        df = df.limit(0, Some(fetch))?;
+    }
+
+    // task_ctx() borrows df; capture before create_physical_plan consumes it.
+    let task_ctx = Arc::new(df.task_ctx());
+    let plan = handle().block_on(df.create_physical_plan())?;
+
+    Ok(ScanHandle {
+        _ctx: ctx,
+        plan,
+        task_ctx,
+    })
+}
+
+impl ScanHandle {
+    /// Output partition count of the planned physical plan.
+    pub fn partition_count(&self) -> usize {
+        self.plan
+            .properties()
+            .output_partitioning()
+            .partition_count()
+    }
+
+    /// Open an independent reader over one plan partition. Concurrently
+    /// callable across partitions: `ExecutionPlan`/`TaskContext` are
+    /// `Send + Sync`, and each call only clones their `Arc`s.
+    pub fn execute_partition(&self, partition: usize) -> ScanResult<StreamingReader> {
+        let count = self.partition_count();
+        if partition >= count {
+            return Err(ScanError::new(
+                DfStatus::InvalidArgument,
+                format!("partition index {partition} out of range: plan has {count} partition(s)"),
+            ));
+        }
+        let plan = Arc::clone(&self.plan);
+        let task_ctx = Arc::clone(&self.task_ctx);
+        let schema: SchemaRef = plan.schema();
+
+        // execute() is synchronous but operators may tokio::spawn at
+        // execute()-time (RepartitionExec et al.), needing a runtime context.
+        let stream = {
+            let _guard = handle().enter();
+            plan.execute(partition, task_ctx)
+                .map_err(|e| ScanError::new(DfStatus::Execution, e.to_string()))?
+        };
+        Ok(StreamingReader { schema, stream })
+    }
+
+    /// Open one reader over the whole plan (all partitions coalesced).
+    pub fn execute_all(&self) -> ScanResult<StreamingReader> {
+        let plan = Arc::clone(&self.plan);
+        let task_ctx = Arc::clone(&self.task_ctx);
+        let schema: SchemaRef = plan.schema();
+        let stream = {
+            let _guard = handle().enter();
+            execute_stream(plan, task_ctx)
+                .map_err(|e| ScanError::new(DfStatus::Execution, e.to_string()))?
+        };
+        Ok(StreamingReader { schema, stream })
+    }
+}
diff --git a/native-ffi/tests/listing.rs b/native-ffi/tests/listing.rs
new file mode 100644
index 0000000..eac5e2e
--- /dev/null
+++ b/native-ffi/tests/listing.rs
@@ -0,0 +1,179 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! End-to-end test of the `datafusion.listing` provider through the plain-C
+//! ABI: write a CSV, encode a ScanConfig pointing at it, scan it, and import
+//! the result back through the Arrow C Stream interface -- the path a foreign
+//! consumer takes. Exercises schema inference (no explicit schema supplied).
+
+use std::ffi::{c_char, CStr};
+use std::fs;
+use std::process;
+use std::ptr;
+
+use datafusion::arrow::array::Int64Array;
+use datafusion::arrow::ffi::FFI_ArrowSchema;
+use datafusion::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
+
+use datafusion_scan_ffi::abi::{
+    df_error_free, df_scan_close, df_scan_create, df_scan_execute, df_scan_partition_count,
+    df_scan_schema, DfScanHandle,
+};
+use datafusion_scan_ffi::ffi_types::{DfBytes, DfStr};
+use datafusion_scan_ffi::listing;
+use datafusion_scan_ffi::proto::{
+    listing_source, scan_config, CsvReadOptionsProto, ListingSource, ScanConfig,
+};
+use prost::Message;
+
+unsafe fn take_err(err: *mut c_char) -> Option<String> {
+    if err.is_null() {
+        None
+    } else {
+        let s = CStr::from_ptr(err).to_string_lossy().into_owned();
+        df_error_free(err);
+        Some(s)
+    }
+}
+
+/// Write a CSV into a unique temp dir and return (dir, file path).
+fn write_csv() -> (std::path::PathBuf, String) {
+    let dir = std::env::temp_dir().join(format!("df-scan-ffi-{}", process::id()));
+    fs::create_dir_all(&dir).expect("create temp dir");
+    let path = dir.join("data.csv");
+    fs::write(&path, "id,name\n1,a\n2,b\n3,c\n").expect("write csv");
+    (dir, path.to_string_lossy().into_owned())
+}
+
+/// Encode a ScanConfig for a CSV listing source over `path`.
+fn csv_config(path: &str) -> Vec<u8> {
+    ScanConfig {
+        provider: listing::NAME.to_string(),
+        source: Some(scan_config::Source::Listing(ListingSource {
+            paths: vec![path.to_string()],
+            schema_ipc: None,
+            format: Some(listing_source::Format::Csv(CsvReadOptionsProto {
+                has_header: true,
+                delimiter: b',' as u32,
+                quote: b'"' as u32,
+                file_extension: ".csv".to_string(),
+                ..Default::default()
+            })),
+        })),
+    }
+    .encode_to_vec()
+}
+
+fn provider() -> DfStr {
+    DfStr {
+        ptr: listing::NAME.as_ptr(),
+        len: listing::NAME.len(),
+    }
+}
+
+fn options(bytes: &[u8]) -> DfBytes {
+    DfBytes {
+        ptr: bytes.as_ptr(),
+        len: bytes.len(),
+    }
+}
+
+const EMPTY: DfBytes = DfBytes {
+    ptr: ptr::null(),
+    len: 0,
+};
+
+#[test]
+fn listing_csv_schema_is_inferred() {
+    listing::register();
+    let (_dir, path) = write_csv();
+    let cfg = csv_config(&path);
+
+    let mut schema = FFI_ArrowSchema::empty();
+    let mut err: *mut c_char = ptr::null_mut();
+    let status = unsafe { df_scan_schema(provider(), options(&cfg), EMPTY, &mut schema, &mut err) };
+    assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+
+    let schema =
+        datafusion::arrow::datatypes::Schema::try_from(&schema).expect("import FFI_ArrowSchema");
+    let names: Vec<_> = schema.fields().iter().map(|f| f.name().as_str()).collect();
+    assert_eq!(names, vec!["id", "name"]);
+}
+
+#[test]
+fn listing_csv_scans_rows() {
+    listing::register();
+    let (_dir, path) = write_csv();
+    let cfg = csv_config(&path);
+
+    // Plan.
+    let mut handle: *mut DfScanHandle = ptr::null_mut();
+    let mut err: *mut c_char = ptr::null_mut();
+    let status = unsafe {
+        df_scan_create(
+            provider(),
+            options(&cfg),
+            EMPTY,
+            0,
+            0,
+            -1,
+            ptr::null(),
+            0,
+            ptr::null(),
+            0,
+            ptr::null(),
+            0,
+            &mut handle,
+            &mut err,
+        )
+    };
+    assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+    assert!(!handle.is_null());
+
+    // Partition count is reported.
+    let mut count = 0i32;
+    let mut err2: *mut c_char = ptr::null_mut();
+    assert_eq!(
+        unsafe { df_scan_partition_count(handle, &mut count, &mut err2) },
+        0
+    );
+    assert!(count >= 1, "expected at least one partition, got {count}");
+
+    // Execute the whole plan as one coalesced stream and sum `id`.
+    let mut stream = FFI_ArrowArrayStream::empty();
+    let mut err3: *mut c_char = ptr::null_mut();
+    let status = unsafe { df_scan_execute(handle, &mut stream, &mut err3) };
+    assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err3) });
+
+    let reader = unsafe { ArrowArrayStreamReader::from_raw(&mut stream) }.expect("import stream");
+    let mut total: i64 = 0;
+    let mut rows = 0usize;
+    for batch in reader {
+        let batch = batch.expect("batch");
+        rows += batch.num_rows();
+        let ids = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int64Array>()
+            .expect("id is Int64");
+        total += ids.values().iter().sum::<i64>();
+    }
+    assert_eq!(rows, 3);
+    assert_eq!(total, 1 + 2 + 3);
+
+    unsafe { df_scan_close(handle) };
+}
diff --git a/native-ffi/tests/proto.rs b/native-ffi/tests/proto.rs
new file mode 100644
index 0000000..ac668c7
--- /dev/null
+++ b/native-ffi/tests/proto.rs
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Confirms the generated scan-config / scan-request types encode and decode,
+//! including a per-format read-option message embedded through the source
+//! oneof -- i.e. the imports across `proto/*.proto` resolved at build time.
+
+use datafusion_scan_ffi::proto::{
+    listing_source, scan_config, CsvReadOptionsProto, ListingSource, ScanConfig, ScanRequest,
+};
+use prost::Message;
+
+#[test]
+fn scan_config_with_listing_source_roundtrips() {
+    let config = ScanConfig {
+        provider: "datafusion.listing".to_string(),
+        source: Some(scan_config::Source::Listing(ListingSource {
+            paths: vec!["s3://bucket/data/".to_string()],
+            schema_ipc: None,
+            format: Some(listing_source::Format::Csv(CsvReadOptionsProto {
+                has_header: true,
+                delimiter: b',' as u32,
+                quote: b'"' as u32,
+                file_extension: ".csv".to_string(),
+                ..Default::default()
+            })),
+        })),
+    };
+
+    let bytes = config.encode_to_vec();
+    let decoded = ScanConfig::decode(bytes.as_slice()).expect("decode ScanConfig");
+
+    assert_eq!(decoded.provider, "datafusion.listing");
+    match decoded.source {
+        Some(scan_config::Source::Listing(l)) => {
+            assert_eq!(l.paths, vec!["s3://bucket/data/".to_string()]);
+            match l.format {
+                Some(listing_source::Format::Csv(c)) => {
+                    assert!(c.has_header);
+                    assert_eq!(c.delimiter, b',' as u32);
+                }
+                other => panic!("expected CSV format, got {other:?}"),
+            }
+        }
+        other => panic!("expected listing source, got {other:?}"),
+    }
+}
+
+#[test]
+fn scan_request_roundtrips() {
+    let req = ScanRequest {
+        projection: vec!["id".to_string(), "name".to_string()],
+        filters: vec![vec![1, 2, 3], vec![4, 5]],
+        limit: Some(100),
+        target_partitions: 8,
+        batch_size: 0,
+        config_overrides: [(
+            "datafusion.execution.parquet.pushdown_filters".to_string(),
+            "true".to_string(),
+        )]
+        .into_iter()
+        .collect(),
+    };
+
+    let bytes = req.encode_to_vec();
+    let decoded = ScanRequest::decode(bytes.as_slice()).expect("decode ScanRequest");
+
+    assert_eq!(decoded.projection, vec!["id", "name"]);
+    assert_eq!(decoded.filters.len(), 2);
+    assert_eq!(decoded.limit, Some(100));
+    assert_eq!(decoded.target_partitions, 8);
+    assert_eq!(
+        decoded
+            .config_overrides
+            .get("datafusion.execution.parquet.pushdown_filters")
+            .map(String::as_str),
+        Some("true")
+    );
+}
diff --git a/native-ffi/tests/roundtrip.rs b/native-ffi/tests/roundtrip.rs
new file mode 100644
index 0000000..3ec6436
--- /dev/null
+++ b/native-ffi/tests/roundtrip.rs
@@ -0,0 +1,213 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Exercises the plain-C ABI exactly as a foreign consumer would: call the
+//! `df_scan_*` entry points with C structs, hand a caller-allocated
+//! `FFI_ArrowArrayStream` across the boundary, then import it back through the
+//! Arrow C Stream interface (`ArrowArrayStreamReader`) -- the Rust analogue of
+//! arrow-java's `Data.importArrayStream`. No JVM involved.
+
+use std::ffi::{c_char, CStr};
+use std::ptr;
+
+use datafusion::arrow::array::Int64Array;
+use datafusion::arrow::ffi::FFI_ArrowSchema;
+use datafusion::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
+
+use datafusion_scan_ffi::abi::{
+    df_error_free, df_scan_abi_version, df_scan_close, df_scan_create, df_scan_execute,
+    df_scan_execute_partition, df_scan_partition_count, df_scan_schema, DfScanHandle,
+};
+use datafusion_scan_ffi::ffi_types::{DfBytes, DfStr};
+use datafusion_scan_ffi::{demo, ABI_VERSION};
+
+fn provider() -> DfStr {
+    DfStr {
+        ptr: demo::NAME.as_ptr(),
+        len: demo::NAME.len(),
+    }
+}
+
+const EMPTY_BYTES: DfBytes = DfBytes {
+    ptr: ptr::null(),
+    len: 0,
+};
+
+/// Pull an err string (if any) for assertions, freeing it.
+unsafe fn take_err(err: *mut c_char) -> Option<String> {
+    if err.is_null() {
+        None
+    } else {
+        let s = CStr::from_ptr(err).to_string_lossy().into_owned();
+        df_error_free(err);
+        Some(s)
+    }
+}
+
+#[test]
+fn abi_version_matches() {
+    assert_eq!(df_scan_abi_version(), ABI_VERSION);
+}
+
+#[test]
+fn schema_probe_returns_provider_schema() {
+    demo::register();
+    let mut out = FFI_ArrowSchema::empty();
+    let mut err: *mut c_char = ptr::null_mut();
+    let status =
+        unsafe { df_scan_schema(provider(), EMPTY_BYTES, EMPTY_BYTES, &mut out, &mut err) };
+    assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+
+    let schema =
+        datafusion::arrow::datatypes::Schema::try_from(&out).expect("import FFI_ArrowSchema");
+    let names: Vec<_> = schema.fields().iter().map(|f| f.name().as_str()).collect();
+    assert_eq!(names, vec!["id", "name"]);
+}
+
+#[test]
+fn unknown_provider_reports_status_and_message() {
+    let bad = DfStr {
+        ptr: b"nope".as_ptr(),
+        len: 4,
+    };
+    let mut out = FFI_ArrowSchema::empty();
+    let mut err: *mut c_char = ptr::null_mut();
+    let status = unsafe { df_scan_schema(bad, EMPTY_BYTES, EMPTY_BYTES, &mut out, &mut err) };
+    assert_eq!(status, 2 /* DF_UNKNOWN_PROVIDER */);
+    let msg = unsafe { take_err(err) }.expect("error message");
+    assert!(msg.contains("nope"), "msg was: {msg}");
+}
+
+#[test]
+fn create_reports_two_partitions() {
+    demo::register();
+    let handle = create_full_scan();
+    let mut count = 0i32;
+    let mut err: *mut c_char = ptr::null_mut();
+    let status = unsafe { df_scan_partition_count(handle, &mut count, &mut err) };
+    assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+    assert_eq!(count, 2, "demo provider has two partitions");
+    unsafe { df_scan_close(handle) };
+}
+
+#[test]
+fn execute_partition_roundtrips_arrow_c_stream() {
+    demo::register();
+    let handle = create_full_scan();
+
+    // Sum `id` across both partitions by importing each stream back through
+    // the Arrow C Stream interface, the way a foreign consumer would.
+    let mut total: i64 = 0;
+    let mut rows = 0usize;
+    for partition in 0..2 {
+        let mut stream = FFI_ArrowArrayStream::empty();
+        let mut err: *mut c_char = ptr::null_mut();
+        let status = unsafe { df_scan_execute_partition(handle, partition, &mut stream, &mut err) };
+        assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+
+        let reader = unsafe { ArrowArrayStreamReader::from_raw(&mut stream) }
+            .expect("import FFI_ArrowArrayStream");
+        for batch in reader {
+            let batch = batch.expect("batch");
+            rows += batch.num_rows();
+            let ids = batch
+                .column(0)
+                .as_any()
+                .downcast_ref::<Int64Array>()
+                .expect("id column is Int64");
+            total += ids.values().iter().sum::<i64>();
+        }
+    }
+
+    assert_eq!(rows, 5, "3 + 2 rows across the two partitions");
+    assert_eq!(total, 1 + 2 + 3 + 4 + 5);
+    unsafe { df_scan_close(handle) };
+}
+
+#[test]
+fn limit_caps_row_count() {
+    demo::register();
+    // demo provider has 5 rows across two partitions; cap at 2.
+    let mut handle: *mut DfScanHandle = ptr::null_mut();
+    let mut err: *mut c_char = ptr::null_mut();
+    let status = unsafe {
+        df_scan_create(
+            provider(),
+            EMPTY_BYTES,
+            EMPTY_BYTES,
+            0,
+            0,
+            2, // limit
+            ptr::null(),
+            0,
+            ptr::null(),
+            0,
+            ptr::null(),
+            0,
+            &mut handle,
+            &mut err,
+        )
+    };
+    assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+
+    // Read the whole plan; the limit must hold across partitions.
+    let mut stream = FFI_ArrowArrayStream::empty();
+    let mut err2: *mut c_char = ptr::null_mut();
+    assert_eq!(
+        unsafe { df_scan_execute(handle, &mut stream, &mut err2) },
+        0,
+        "err: {:?}",
+        unsafe { take_err(err2) }
+    );
+    let reader = unsafe { ArrowArrayStreamReader::from_raw(&mut stream) }.expect("import");
+    let rows: usize = reader.map(|b| b.expect("batch").num_rows()).sum();
+    assert_eq!(rows, 2, "limit should cap the scan at 2 rows");
+
+    unsafe { df_scan_close(handle) };
+}
+
+#[test]
+fn close_is_null_safe() {
+    unsafe { df_scan_close(ptr::null_mut()) };
+}
+
+/// Plan a full scan (no projection / filters) over the demo provider.
+fn create_full_scan() -> *mut DfScanHandle {
+    let mut handle: *mut DfScanHandle = ptr::null_mut();
+    let mut err: *mut c_char = ptr::null_mut();
+    let status = unsafe {
+        df_scan_create(
+            provider(),
+            EMPTY_BYTES,
+            EMPTY_BYTES,
+            0,
+            0,
+            -1,
+            ptr::null(),
+            0,
+            ptr::null(),
+            0,
+            ptr::null(),
+            0,
+            &mut handle,
+            &mut err,
+        )
+    };
+    assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+    assert!(!handle.is_null());
+    handle
+}
diff --git a/native-jni/Cargo.toml b/native-jni/Cargo.toml
new file mode 100644
index 0000000..1001bf6
--- /dev/null
+++ b/native-jni/Cargo.toml
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-scan-jni"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+publish = false
+
+[lib]
+# The JVM-loaded shim. Thin: it marshals Java args into the in-process scan
+# core of `datafusion-scan-ffi` and writes Arrow C Stream / C Schema structs
+# into the addresses arrow-java allocated. All Arrow data crosses via the C
+# Data interface, not through JNI.
+crate-type = ["cdylib"]
+
+[dependencies]
+# The plain-C scan crate, used in-process. `demo-providers` registers the
+# in-memory provider alongside `datafusion.listing` for testing.
+datafusion-scan-ffi = { path = "../native-ffi", features = ["demo-providers"] }
+# Arrow C interface types written into Java-allocated structs.
+arrow = { workspace = true }
+# Decodes the engine's ScanRequest blob.
+prost = { workspace = true }
+jni = { workspace = true }
diff --git a/native-jni/src/lib.rs b/native-jni/src/lib.rs
new file mode 100644
index 0000000..03dd9e8
--- /dev/null
+++ b/native-jni/src/lib.rs
@@ -0,0 +1,238 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Thin JNI shim over the plain-C scan core (`datafusion-scan-ffi`).
+//!
+//! This is the JVM's path to the scan ABI. It is deliberately minimal: it
+//! marshals Java arguments (a `String` provider name and two `byte[]` blobs)
+//! into the in-process scan core, hands back an opaque handle as a `jlong`,
+//! and -- for the data plane -- writes a standard `FFI_ArrowArrayStream` (or
+//! `FFI_ArrowSchema`) into the address arrow-java allocated. **No Arrow data
+//! crosses the JNI boundary**: batches flow through the Arrow C Stream
+//! interface, which arrow-java imports with `Data.importArrayStream`.
+//!
+//! Everything here mirrors `core`'s existing `DataFrame` collect path; the only
+//! new ABI is the handful of `Java_org_apache_datafusion_scan_NativeScan_*`
+//! entry points below. Non-Java consumers use the `df_scan_*` C symbols
+//! exported by `datafusion-scan-ffi` instead; this crate is purely the JVM
+//! adapter.
+
+use std::sync::OnceLock;
+
+use arrow::ffi::FFI_ArrowSchema;
+use arrow::ffi_stream::FFI_ArrowArrayStream;
+use datafusion_scan_ffi::proto::ScanRequest as ProtoScanRequest;
+use datafusion_scan_ffi::scan::{self, ScanHandle, ScanRequest};
+use datafusion_scan_ffi::{demo, listing};
+use jni::objects::{JByteArray, JClass, JString};
+use jni::sys::{jint, jlong};
+use jni::JNIEnv;
+use prost::Message;
+
+/// Register the in-tree providers exactly once. The shim is the registration
+/// point for the JVM build; a non-Java embedder registers its own.
+fn ensure_registered() {
+    static INIT: OnceLock<()> = OnceLock::new();
+    INIT.get_or_init(|| {
+        listing::register();
+        demo::register();
+    });
+}
+
+/// Run `body`; on `Err`, throw a Java `RuntimeException` and return `default`.
+/// Mirrors the project's existing `try_unwrap_or_throw` pattern.
+fn try_or_throw<T>(
+    env: &mut JNIEnv,
+    default: T,
+    body: impl FnOnce(&mut JNIEnv) -> Result<T, String>,
+) -> T {
+    match body(env) {
+        Ok(value) => value,
+        Err(message) => {
+            // If throwing fails there is nothing more we can do; the default is
+            // still returned so we don't leave the stack in a bad state.
+            let _ = env.throw_new("java/lang/RuntimeException", message);
+            default
+        }
+    }
+}
+
+fn read_bytes(env: &mut JNIEnv, arr: &JByteArray) -> Result<Vec<u8>, String> {
+    if arr.is_null() {
+        Ok(Vec::new())
+    } else {
+        env.convert_byte_array(arr).map_err(|e| e.to_string())
+    }
+}
+
+fn read_string(env: &mut JNIEnv, s: &JString) -> Result<String, String> {
+    env.get_string(s).map(Into::into).map_err(|e| e.to_string())
+}
+
+/// Decode the engine's `ScanRequest` blob into the scan core's request,
+/// borrowing the provider name and config bytes. Empty blob -> no pushdown.
+fn build_request<'a>(
+    provider: &'a str,
+    config: &'a [u8],
+    scan_request: &[u8],
+) -> Result<ScanRequest<'a>, String> {
+    let req = if scan_request.is_empty() {
+        ProtoScanRequest::default()
+    } else {
+        ProtoScanRequest::decode(scan_request)
+            .map_err(|e| format!("failed to decode ScanRequest: {e}"))?
+    };
+    Ok(ScanRequest {
+        provider,
+        options: config,
+        partition: &[],
+        target_partitions: req.target_partitions,
+        batch_size: req.batch_size,
+        limit: req.limit.map(|l| l as usize),
+        config_overrides: req.config_overrides.into_iter().collect(),
+        projection: req.projection,
+        filters: req.filters,
+    })
+}
+
+/// Probe a provider's output schema, writing an `FFI_ArrowSchema` into the
+/// arrow-java-allocated `ArrowSchema` at `schema_addr`.
+#[no_mangle]
+pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_providerSchema<'local>(
+    mut env: JNIEnv<'local>,
+    _class: JClass<'local>,
+    provider: JString<'local>,
+    config: JByteArray<'local>,
+    schema_addr: jlong,
+) {
+    ensure_registered();
+    try_or_throw(&mut env, (), |env| {
+        if schema_addr == 0 {
+            return Err("schema address is null".to_string());
+        }
+        let provider = read_string(env, &provider)?;
+        let config = read_bytes(env, &config)?;
+        let schema = scan::schema(&provider, &config, &[]).map_err(|e| e.message)?;
+        let ffi = FFI_ArrowSchema::try_from(schema.as_ref()).map_err(|e| e.to_string())?;
+        // SAFETY: arrow-java allocated an empty ArrowSchema at this address.
+        unsafe { std::ptr::write(schema_addr as *mut FFI_ArrowSchema, ffi) };
+        Ok(())
+    })
+}
+
+/// Plan a scan. Returns an opaque handle (boxed [`ScanHandle`] pointer) as a
+/// `jlong`, or 0 after throwing on error. Release with `closeScan`.
+#[no_mangle]
+pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_createScan<'local>(
+    mut env: JNIEnv<'local>,
+    _class: JClass<'local>,
+    provider: JString<'local>,
+    config: JByteArray<'local>,
+    scan_request: JByteArray<'local>,
+) -> jlong {
+    ensure_registered();
+    try_or_throw(&mut env, 0, |env| {
+        let provider = read_string(env, &provider)?;
+        let config = read_bytes(env, &config)?;
+        let scan_request = read_bytes(env, &scan_request)?;
+        let request = build_request(&provider, &config, &scan_request)?;
+        let handle = scan::create(request).map_err(|e| e.message)?;
+        Ok(Box::into_raw(Box::new(handle)) as jlong)
+    })
+}
+
+/// Output partition count of a planned scan.
+#[no_mangle]
+pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_partitionCount<'local>(
+    mut env: JNIEnv<'local>,
+    _class: JClass<'local>,
+    handle: jlong,
+) -> jint {
+    try_or_throw(&mut env, 0, |_env| {
+        let scan = handle_ref(handle)?;
+        Ok(scan.partition_count() as jint)
+    })
+}
+
+/// Execute one partition, writing an `FFI_ArrowArrayStream` into the
+/// arrow-java-allocated `ArrowArrayStream` at `stream_addr`.
+#[no_mangle]
+pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_executeStreamPartition<'local>(
+    mut env: JNIEnv<'local>,
+    _class: JClass<'local>,
+    handle: jlong,
+    partition: jint,
+    stream_addr: jlong,
+) {
+    try_or_throw(&mut env, (), |_env| {
+        if partition < 0 {
+            return Err("partition index is negative".to_string());
+        }
+        let scan = handle_ref(handle)?;
+        let reader = scan
+            .execute_partition(partition as usize)
+            .map_err(|e| e.message)?;
+        write_stream(stream_addr, FFI_ArrowArrayStream::new(Box::new(reader)))
+    })
+}
+
+/// Execute the whole plan as a single coalesced stream.
+#[no_mangle]
+pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_executeStream<'local>(
+    mut env: JNIEnv<'local>,
+    _class: JClass<'local>,
+    handle: jlong,
+    stream_addr: jlong,
+) {
+    try_or_throw(&mut env, (), |_env| {
+        let scan = handle_ref(handle)?;
+        let reader = scan.execute_all().map_err(|e| e.message)?;
+        write_stream(stream_addr, FFI_ArrowArrayStream::new(Box::new(reader)))
+    })
+}
+
+/// Drop a planned scan. Null-safe; must not race an in-flight execute on the
+/// same handle (the Java wrapper enforces this).
+#[no_mangle]
+pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_closeScan<'local>(
+    _env: JNIEnv<'local>,
+    _class: JClass<'local>,
+    handle: jlong,
+) {
+    if handle != 0 {
+        // SAFETY: handle came from createScan and is not used afterwards.
+        drop(unsafe { Box::from_raw(handle as *mut ScanHandle) });
+    }
+}
+
+/// Borrow a [`ScanHandle`] from a `jlong`, erroring on null.
+fn handle_ref<'a>(handle: jlong) -> Result<&'a ScanHandle, String> {
+    if handle == 0 {
+        return Err("scan handle is null".to_string());
+    }
+    // SAFETY: handle came from createScan and outlives this borrow.
+    Ok(unsafe { &*(handle as *const ScanHandle) })
+}
+
+fn write_stream(stream_addr: jlong, ffi: FFI_ArrowArrayStream) -> Result<(), String> {
+    if stream_addr == 0 {
+        return Err("stream address is null".to_string());
+    }
+    // SAFETY: arrow-java allocated an empty ArrowArrayStream at this address.
+    unsafe { std::ptr::write(stream_addr as *mut FFI_ArrowArrayStream, ffi) };
+    Ok(())
+}
diff --git a/native/Cargo.toml b/native/Cargo.toml
index 0362ae6..c040448 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -17,14 +17,17 @@
 
 [package]
 name = "datafusion-jni"
-version = "0.1.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+# cdylib JNI artifact loaded by the JVM, not a crates.io library.
 publish = false
 
 [lib]
 # `rlib` alongside `cdylib` so `cargo test` has a Rust-level harness for
-# native-only invariants (e.g. error-classification routing through wrapped
-# DataFusionError chains). The `cdylib` is still the artifact the JVM loads.
+# native-only invariants (the error-classification tests now live in
+# `datafusion-jni-common`). The `cdylib` is still the artifact the JVM loads.
 crate-type = ["cdylib", "rlib"]
 
 [features]
@@ -75,28 +78,27 @@ runtime-metrics = ["dep:tokio-metrics"]
 spark = ["dep:datafusion-spark"]
 
 [dependencies]
-arrow = { version = "58", features = ["ffi"] }
-async-trait = "0.1"
-datafusion = { version = "53.1.0", features = ["avro"] }
-datafusion-proto = "53.1.0"
+arrow = { workspace = true }
+async-trait = { workspace = true }
+datafusion = { workspace = true, features = ["avro"] }
+# Shared JNI plumbing (error->exception mapping, runtime singleton,
+# StreamingReader). `avro` keeps the classifier's AvroError->IoException arm
+# in sync with the `avro` feature on `datafusion` above.
+datafusion-jni-common = { path = "../native-common", features = ["avro"] }
+datafusion-proto = { workspace = true }
 # Apache Spark-compatible functions + expression planners. Optional and
 # gated behind the `spark` feature (in the default set). The `core` feature
 # of the crate is what exposes `SessionStateBuilderSpark`.
-datafusion-spark = { version = "53.1.0", features = ["core"], optional = true }
-datafusion-substrait = { version = "53.1.0", optional = true }
-futures = "0.3"
-jni = "0.21"
-# Pin to the same major as DataFusion 53.1 pulls in transitively (0.13.x)
-# so we share the same `dyn ObjectStore` vtable and don't double-link.
-object_store = { version = "0.13", default-features = false }
-prost = "0.14"
-tokio = { version = "1", features = ["rt-multi-thread"] }
-# Tokio runtime metrics. Optional + cfg-gated: this crate's API surface lives
-# behind `--cfg tokio_unstable`, so enabling the `runtime-metrics` feature also
-# requires the caller to set `RUSTFLAGS="--cfg tokio_unstable"` at build time.
-tokio-metrics = { version = "0.5", optional = true }
-url = "2"
+datafusion-spark = { workspace = true, features = ["core"], optional = true }
+datafusion-substrait = { workspace = true, optional = true }
+futures = { workspace = true }
+jni = { workspace = true }
+object_store = { workspace = true }
+prost = { workspace = true }
+tokio = { workspace = true }
+tokio-metrics = { workspace = true, optional = true }
+url = { workspace = true }
 
 [build-dependencies]
-prost-build = "0.14"
-protoc-bin-vendored = "3"
+prost-build = { workspace = true }
+protoc-bin-vendored = { workspace = true }
diff --git a/native/src/arrow.rs b/native/src/arrow.rs
index 2bbe7b0..67e5caf 100644
--- a/native/src/arrow.rs
+++ b/native/src/arrow.rs
@@ -23,10 +23,10 @@ use jni::sys::jlong;
 use jni::JNIEnv;
 use prost::Message;
 
-use crate::errors::{try_unwrap_or_throw, JniResult};
 use crate::proto_gen::ArrowReadOptionsProto;
 use crate::runtime;
 use crate::schema::decode_optional_schema;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
 
 fn with_arrow_options<R>(
     env: &mut JNIEnv,
diff --git a/native/src/avro.rs b/native/src/avro.rs
index 85d4a07..257ae32 100644
--- a/native/src/avro.rs
+++ b/native/src/avro.rs
@@ -23,10 +23,10 @@ use jni::sys::jlong;
 use jni::JNIEnv;
 use prost::Message;
 
-use crate::errors::{try_unwrap_or_throw, JniResult};
 use crate::proto_gen::AvroReadOptionsProto;
 use crate::runtime;
 use crate::schema::decode_optional_schema;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
 
 fn with_avro_options<R>(
     env: &mut JNIEnv,
diff --git a/native/src/cache_manager.rs b/native/src/cache_manager.rs
index 3b9e286..ec38dc8 100644
--- a/native/src/cache_manager.rs
+++ b/native/src/cache_manager.rs
@@ -34,8 +34,8 @@ use datafusion::execution::cache::cache_unit::{
 };
 use datafusion::execution::cache::DefaultListFilesCache;
 
-use crate::errors::JniResult;
 use crate::proto_gen::CacheManagerOptionsProto;
+use datafusion_jni_common::errors::JniResult;
 
 /// Build a [`CacheManagerConfig`] from the proto. Returns `Ok(None)` if the
 /// caller did not set any cache-manager field, so the JNI layer can skip the
diff --git a/native/src/csv.rs b/native/src/csv.rs
index 3ae4627..b79ed59 100644
--- a/native/src/csv.rs
+++ b/native/src/csv.rs
@@ -26,12 +26,12 @@ use jni::sys::jlong;
 use jni::JNIEnv;
 use prost::Message;
 
-use crate::errors::{try_unwrap_or_throw, JniResult};
 use crate::proto_gen::{
     CsvReadOptionsProto, CsvWriteOptionsProto, FileCompressionType as ProtoFileCompressionType,
 };
 use crate::runtime;
 use crate::schema::decode_optional_schema;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
 
 fn with_csv_options<R>(
     env: &mut JNIEnv,
diff --git a/native/src/json.rs b/native/src/json.rs
index 8eea32f..b87be78 100644
--- a/native/src/json.rs
+++ b/native/src/json.rs
@@ -27,12 +27,12 @@ use jni::sys::jlong;
 use jni::JNIEnv;
 use prost::Message;
 
-use crate::errors::{try_unwrap_or_throw, JniResult};
 use crate::proto_gen::{
     FileCompressionType as ProtoFileCompressionType, JsonWriteOptionsProto, NdJsonReadOptionsProto,
 };
 use crate::runtime;
 use crate::schema::decode_optional_schema;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
 
 fn with_json_options<R>(
     env: &mut JNIEnv,
diff --git a/native/src/lib.rs b/native/src/lib.rs
index 43161d2..56bef5d 100644
--- a/native/src/lib.rs
+++ b/native/src/lib.rs
@@ -19,7 +19,6 @@ mod arrow;
 mod avro;
 mod cache_manager;
 mod csv;
-mod errors;
 mod jni_util;
 mod json;
 mod memory;
@@ -34,16 +33,13 @@ pub(crate) mod proto_gen {
     include!(concat!(env!("OUT_DIR"), "/datafusion_java.rs"));
 }
 
-use std::panic::{catch_unwind, AssertUnwindSafe};
 use std::path::PathBuf;
 use std::sync::{Arc, OnceLock};
 
-use datafusion::arrow::array::RecordBatch;
 use datafusion::arrow::datatypes::SchemaRef;
-use datafusion::arrow::error::ArrowError;
 use datafusion::arrow::ffi_stream::FFI_ArrowArrayStream;
 use datafusion::arrow::ipc::writer::StreamWriter;
-use datafusion::arrow::record_batch::{RecordBatchIterator, RecordBatchReader};
+use datafusion::arrow::record_batch::RecordBatchIterator;
 use datafusion::common::{JoinType, UnnestOptions};
 use datafusion::config::TableParquetOptions;
 use datafusion::dataframe::DataFrame;
@@ -51,11 +47,9 @@ use datafusion::dataframe::DataFrameWriteOptions;
 use datafusion::error::DataFusionError;
 use datafusion::execution::disk_manager::{DiskManagerBuilder, DiskManagerMode};
 use datafusion::execution::runtime_env::{RuntimeEnv, RuntimeEnvBuilder};
-use datafusion::execution::SendableRecordBatchStream;
 use datafusion::logical_expr::Expr;
 use datafusion::logical_expr::{col, Partitioning, ScalarUDF, Signature, SortExpr};
 use datafusion::prelude::{ParquetReadOptions, SessionConfig, SessionContext};
-use futures::StreamExt;
 use jni::objects::{JBooleanArray, JByteArray, JClass, JObject, JObjectArray, JString};
 use jni::sys::{jboolean, jbyte, jbyteArray, jint, jlong};
 use jni::JNIEnv;
@@ -63,7 +57,10 @@ use jni::JavaVM;
 use prost::Message;
 use tokio::runtime::Runtime;
 
-use crate::errors::{try_unwrap_or_throw, JniResult};
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
+// Re-exported so sibling modules keep their crate-local `crate::StreamingReader` path.
+pub(crate) use datafusion_jni_common::StreamingReader;
+
 use crate::proto_gen::ParquetReadOptionsProto;
 use crate::proto_gen::SessionOptions;
 use crate::schema::decode_optional_schema;
@@ -84,18 +81,15 @@ pub(crate) fn jvm() -> &'static JavaVM {
 }
 
 pub(crate) fn runtime() -> &'static Runtime {
-    static RT: OnceLock<Runtime> = OnceLock::new();
-    RT.get_or_init(|| {
-        let rt = Runtime::new().expect("failed to create Tokio runtime");
-        // Eagerly install the runtime-metrics accumulator (no-op when the
-        // `runtime-metrics` Cargo feature is off). Initialising here -- not
-        // lazily on the first `runtimeStats()` call -- means the
-        // RuntimeMonitor's sampling baseline coincides with runtime start, so
-        // poll/park/busy totals reflect activity from the first query onward
-        // rather than from the first observation.
-        crate::runtime_metrics::init(rt.handle());
-        rt
-    })
+    // The singleton itself lives in datafusion-jni-common (shared with the
+    // datafusion-spark-bridge SDK; each cdylib statically links its own
+    // copy, so the runtime stays per-library). The init hook eagerly installs the
+    // runtime-metrics accumulator (no-op when the `runtime-metrics` Cargo
+    // feature is off). Initialising here -- not lazily on the first
+    // `runtimeStats()` call -- means the RuntimeMonitor's sampling baseline
+    // coincides with runtime start, so poll/park/busy totals reflect activity
+    // from the first query onward rather than from the first observation.
+    datafusion_jni_common::runtime_with_init(crate::runtime_metrics::init)
 }
 
 /// Wrap the (already-built) `RuntimeEnvBuilder`'s memory pool with a
@@ -324,50 +318,6 @@ pub extern "system" fn Java_org_apache_datafusion_DataFrame_collectDataFrame<'lo
     })
 }
 
-/// Bridges DataFusion's async [`SendableRecordBatchStream`] to the synchronous
-/// [`RecordBatchReader`] interface that `FFI_ArrowArrayStream` (and therefore
-/// the Java `ArrowReader`) consumes. Each call to `next()` drives one
-/// `runtime().block_on(stream.next())`, so memory pressure stays bounded by the
-/// executor pipeline plus a single in-flight batch.
-struct StreamingReader {
-    schema: SchemaRef,
-    stream: SendableRecordBatchStream,
-}
-
-impl Iterator for StreamingReader {
-    type Item = Result<RecordBatch, ArrowError>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        // Arrow's C ABI invokes this iterator through FFI_ArrowArrayStream's
-        // vtable, outside the JNI handler's try_unwrap_or_throw guard. A panic
-        // here (buggy UDF, arrow cast that panics, runtime poison) would
-        // unwind across C/FFI -- undefined behaviour. Catch it and surface as
-        // an ArrowError so the Java side sees a normal exception instead.
-        let next = catch_unwind(AssertUnwindSafe(|| runtime().block_on(self.stream.next())));
-        match next {
-            Ok(item) => item.map(|r| r.map_err(|e| ArrowError::ExternalError(Box::new(e)))),
-            Err(panic) => {
-                let msg = if let Some(s) = panic.downcast_ref::<String>() {
-                    s.clone()
-                } else if let Some(s) = panic.downcast_ref::<&str>() {
-                    (*s).to_string()
-                } else {
-                    "rust panic with non-string payload".to_string()
-                };
-                Some(Err(ArrowError::ExternalError(
-                    format!("panic in DataFrame stream: {msg}").into(),
-                )))
-            }
-        }
-    }
-}
-
-impl RecordBatchReader for StreamingReader {
-    fn schema(&self) -> SchemaRef {
-        self.schema.clone()
-    }
-}
-
 #[no_mangle]
 pub extern "system" fn Java_org_apache_datafusion_DataFrame_executeStreamDataFrame<'local>(
     mut env: JNIEnv<'local>,
diff --git a/native/src/object_store.rs b/native/src/object_store.rs
index eefccf2..985d721 100644
--- a/native/src/object_store.rs
+++ b/native/src/object_store.rs
@@ -28,9 +28,9 @@ use std::sync::Arc;
 use datafusion::prelude::SessionContext;
 use url::Url;
 
-use crate::errors::JniResult;
 use crate::proto_gen::object_store_registration::Backend;
 use crate::proto_gen::ObjectStoreRegistration;
+use datafusion_jni_common::errors::JniResult;
 
 #[cfg(feature = "object-store-gcp")]
 use crate::proto_gen::GcsOptions;
diff --git a/native/src/proto.rs b/native/src/proto.rs
index 4f187bc..c1315f9 100644
--- a/native/src/proto.rs
+++ b/native/src/proto.rs
@@ -28,8 +28,8 @@ use jni::sys::{jbyteArray, jlong};
 use jni::JNIEnv;
 use prost::Message;
 
-use crate::errors::{try_unwrap_or_throw, JniResult};
 use crate::runtime;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
 
 #[no_mangle]
 pub extern "system" fn Java_org_apache_datafusion_SessionContext_createDataFrameFromProto<
diff --git a/native/src/runtime_metrics.rs b/native/src/runtime_metrics.rs
index e69410e..dd60dcb 100644
--- a/native/src/runtime_metrics.rs
+++ b/native/src/runtime_metrics.rs
@@ -38,7 +38,7 @@
 //!   10 totalOverflowCount
 
 #[cfg(not(feature = "runtime-metrics"))]
-use crate::errors::JniResult;
+use datafusion_jni_common::errors::JniResult;
 
 /// Number of i64 values in the snapshot array; kept here so the Java side and
 /// the feature-off stub agree on the layout.
@@ -51,7 +51,7 @@ mod imp {
     use tokio_metrics::{RuntimeIntervals, RuntimeMonitor};
 
     use super::STATS_FIELD_COUNT;
-    use crate::errors::JniResult;
+    use datafusion_jni_common::errors::JniResult;
 
     /// `RuntimeMonitor::intervals().next()` returns *delta* metrics covering
     /// the period since the previous call (or, on the very first call, since
@@ -196,7 +196,7 @@ pub fn runtime_stats() -> JniResult<[i64; STATS_FIELD_COUNT]> {
     Err(
         "datafusion-jni was built without the `runtime-metrics` Cargo feature; \
          rebuild the native crate with \
-         `RUSTFLAGS=\"--cfg tokio_unstable\" cargo build --features runtime-metrics` \
+         `RUSTFLAGS=\"--cfg tokio_unstable\" cargo build -p datafusion-jni --features runtime-metrics` \
          to enable SessionContext.runtimeStats"
             .into(),
     )
diff --git a/native/src/schema.rs b/native/src/schema.rs
index 968a73a..0c3c7ab 100644
--- a/native/src/schema.rs
+++ b/native/src/schema.rs
@@ -20,7 +20,7 @@ use datafusion::arrow::ipc::reader::StreamReader;
 use jni::objects::JByteArray;
 use jni::JNIEnv;
 
-use crate::errors::JniResult;
+use datafusion_jni_common::errors::JniResult;
 
 /// Decode an optional Arrow-IPC schema byte array passed in from Java.
 /// Returns `None` if the byte-array reference is null.
diff --git a/pom.xml b/pom.xml
index 6210841..a48be6c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -33,6 +33,7 @@ under the License.
     <modules>
         <module>core</module>
         <module>examples</module>
+        <module>spark</module>
     </modules>
 
     <properties>
@@ -95,6 +96,11 @@ under the License.
     <build>
         <pluginManagement>
             <plugins>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-compiler-plugin</artifactId>
+                    <version>3.13.0</version>
+                </plugin>
                 <plugin>
                     <groupId>org.apache.maven.plugins</groupId>
                     <artifactId>maven-surefire-plugin</artifactId>
@@ -173,10 +179,10 @@ under the License.
                         <exclude>.mvn/**</exclude>
                         <!-- Build outputs and generated test data -->
                         <exclude>**/target/**</exclude>
-                        <exclude>native/target/**</exclude>
+                        <exclude>rust-target/**</exclude>
                         <exclude>tpch-data/**</exclude>
-                        <!-- Rust lockfile -->
-                        <exclude>native/Cargo.lock</exclude>
+                        <!-- Rust lockfile (single workspace lock) -->
+                        <exclude>Cargo.lock</exclude>
                         <!-- Source-tarball exclude list (data file consumed by check-rat-report.py) -->
                         <exclude>dev/release/rat_exclude_files.txt</exclude>
                     </excludes>
diff --git a/proto/scan_config.proto b/proto/scan_config.proto
new file mode 100644
index 0000000..43593bf
--- /dev/null
+++ b/proto/scan_config.proto
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+syntax = "proto3";
+
+package datafusion_java;
+
+import "csv_read_options.proto";
+import "json_read_options.proto";
+import "parquet_read_options.proto";
+import "avro_read_options.proto";
+import "arrow_read_options.proto";
+
+option java_package = "org.apache.datafusion.protobuf";
+option java_multiple_files = true;
+
+// Provider configuration carried in the `options` byte blob of the plain-C
+// scan ABI (`df_scan_schema` / `df_scan_create`). The ABI itself treats these
+// bytes as opaque; the registered provider builder named by `provider` decodes
+// them. This message is the encoding the in-tree builders agree on -- a custom
+// builder may ignore it and define its own.
+//
+// `provider` selects the registered builder (e.g. "datafusion.listing",
+// "datafusion.memory"). `source` carries that builder's parameters; `custom`
+// is an escape hatch for builders that define their own wire format.
+message ScanConfig {
+  string provider = 1;
+
+  oneof source {
+    ListingSource listing = 2;
+    bytes custom = 15;
+  }
+}
+
+// A file-backed listing source: one or more paths/URIs read with a single
+// file format. Mirrors DataFusion's ListingTable inputs. Object-store
+// credentials/endpoints are configured out of band (registered on the context
+// by the embedding cdylib), not here.
+message ListingSource {
+  // Files or directories. Globs and object-store URIs (s3://, gs://, ...) are
+  // allowed where the registered object store supports them.
+  repeated string paths = 1;
+
+  // The file format and its read options. Reuses the existing per-format
+  // option messages so encoders are shared with the rest of the binding.
+  oneof format {
+    CsvReadOptionsProto csv = 2;
+    NdJsonReadOptionsProto json = 3;
+    ParquetReadOptionsProto parquet = 4;
+    AvroReadOptionsProto avro = 5;
+    ArrowReadOptionsProto arrow = 6;
+  }
+
+  // Optional explicit schema as Arrow IPC schema-message bytes. Unset lets the
+  // provider infer it (e.g. from Parquet metadata or by sampling).
+  optional bytes schema_ipc = 7;
+}
+
+// Per-partition slice descriptor carried in the `partition` byte blob. Empty
+// for a whole-table scan. `index` lets the driver hand each executor task its
+// slice; `opaque` is builder-defined (e.g. a serialized file group), letting a
+// provider partition however it likes without the ABI knowing the shape.
+message ScanPartition {
+  uint32 index = 1;
+  bytes opaque = 2;
+}
diff --git a/proto/scan_request.proto b/proto/scan_request.proto
new file mode 100644
index 0000000..1770ee1
--- /dev/null
+++ b/proto/scan_request.proto
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+syntax = "proto3";
+
+package datafusion_java;
+
+option java_package = "org.apache.datafusion.protobuf";
+option java_multiple_files = true;
+
+// The pushdown a query engine (Spark DataSourceV2, etc.) captures for a scan.
+//
+// This is the *staging* object the engine populates during planning. It maps
+// onto the typed arguments of `df_scan_create` rather than being passed as a
+// single blob: the JNI shim / FFM layer decodes a ScanRequest and explodes it
+// into the call's `projection` / `filters` / `target_partitions` / ...
+// arguments. Keeping the C ABI's arguments typed (not one opaque protobuf)
+// keeps it FFM-friendly and language-neutral; this message just gives the
+// engine one structured thing to build and serialize across its own layers
+// (e.g. driver -> executor task) before the shim makes the native call.
+//
+// It is deliberately NOT the provider config: which provider and its
+// parameters live in ScanConfig (the `options` blob). A ScanRequest is purely
+// "given that provider, here is what to read."
+message ScanRequest {
+  // Pruned columns to project, by name. Empty selects all columns. Names
+  // match the provider's (pre-widening) output schema.
+  repeated string projection = 1;
+
+  // Pushed filters, each a serialized `datafusion.LogicalExprNode` (the same
+  // encoding `datafusion-ffi` uses). The engine translates whichever of its
+  // own predicates it can express and leaves the rest for itself to apply.
+  // The provider receives them as a conjunction (AND).
+  repeated bytes filters = 2;
+
+  // Optional row limit pushed into the scan. Unset means no limit. Advisory:
+  // the engine must still enforce its own limit, since not every plan honors
+  // it exactly.
+  optional uint64 limit = 3;
+
+  // Execution tuning resolved once on the driver and shipped to every executor
+  // so partition counts stay deterministic. <= 0 leaves the DataFusion
+  // default in place (matches the C ABI's convention).
+  int32 target_partitions = 4;
+  int32 batch_size = 5;
+
+  // Session config overrides applied to the scan's private context, e.g.
+  // {"datafusion.execution.parquet.pushdown_filters": "true"}. Resolved on the
+  // driver alongside the tuning above.
+  map<string, string> config_overrides = 6;
+}
diff --git a/spark/pom.xml b/spark/pom.xml
new file mode 100644
index 0000000..26af4f1
--- /dev/null
+++ b/spark/pom.xml
@@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>org.apache.datafusion</groupId>
+        <artifactId>datafusion-java-parent</artifactId>
+        <version>0.2.0-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>datafusion-spark</artifactId>
+    <name>DataFusion Spark DataSource</name>
+    <description>A Spark DataSourceV2 backed by a DataFusion TableProvider via the plain-C scan ABI.</description>
+
+    <properties>
+        <spark.version>4.0.0</spark.version>
+        <scala.binary.version>2.13</scala.binary.version>
+        <!-- The Arrow the target Spark ships. We compile against it but do not
+             bundle it: at runtime the cluster's Arrow is the single arrow-java
+             in the executor JVM, shared by our stream import AND Spark's
+             ArrowColumnVector -> zero-copy columnar. -->
+        <spark.arrow.version>18.1.0</spark.arrow.version>
+    </properties>
+
+    <dependencies>
+        <!-- The JVM scan API (DatafusionScan) + generated protobuf classes.
+             Arrow is excluded so it does not drag datafusion-java's own Arrow
+             19 onto this module's classpath; the Spark-provided Arrow is the
+             single version here. (datafusion-java itself stays on Arrow 19 for
+             its standalone use; only its Arrow transitive is dropped here.) -->
+        <dependency>
+            <groupId>org.apache.datafusion</groupId>
+            <artifactId>datafusion-java</artifactId>
+            <version>${project.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.arrow</groupId>
+                    <artifactId>*</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+
+        <!-- Spark. `provided`: the host cluster supplies it (and its Arrow) at
+             runtime, and it is on the test classpath for the embedded
+             SparkSession. Brings arrow-vector / arrow-memory transitively. -->
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.binary.version}</artifactId>
+            <version>${spark.version}</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <!-- Arrow C Data interface (Data.importArrayStream, ArrowArrayStream).
+             Not bundled by Spark, so declared explicitly, but `provided`: it
+             must match whatever Arrow the cluster runs. Pinned to the target
+             Spark's Arrow as the compile baseline. -->
+        <dependency>
+            <groupId>org.apache.arrow</groupId>
+            <artifactId>arrow-c-data</artifactId>
+            <version>${spark.arrow.version}</version>
+            <scope>provided</scope>
+        </dependency>
+
+        <!-- Test -->
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter</artifactId>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <configuration>
+                    <!-- Spark on Java 17 needs these module opens; java.library.path
+                         lets the embedded scan run load datafusion_scan_jni. -->
+                    <argLine>
+                        -Djava.library.path=${maven.multiModuleProjectDirectory}/rust-target/debug
+                        --add-opens=java.base/java.lang=ALL-UNNAMED
+                        --add-opens=java.base/java.lang.invoke=ALL-UNNAMED
+                        --add-opens=java.base/java.io=ALL-UNNAMED
+                        --add-opens=java.base/java.net=ALL-UNNAMED
+                        --add-opens=java.base/java.nio=ALL-UNNAMED
+                        --add-opens=java.base/java.util=ALL-UNNAMED
+                        --add-opens=java.base/java.util.concurrent=ALL-UNNAMED
+                        --add-opens=java.base/sun.nio.ch=ALL-UNNAMED
+                        --add-opens=java.base/sun.security.action=ALL-UNNAMED
+                    </argLine>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionColumnarPartitionReader.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionColumnarPartitionReader.java
new file mode 100644
index 0000000..7dbb27b
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionColumnarPartitionReader.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import java.io.IOException;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.datafusion.scan.DatafusionScan;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.vectorized.ArrowColumnVector;
+import org.apache.spark.sql.vectorized.ColumnVector;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+
+/**
+ * Reads one scan partition as Spark {@link ColumnarBatch}es, zero-copy.
+ *
+ * <p>The Arrow vectors imported from the native stream are wrapped directly in Spark {@link
+ * ArrowColumnVector}s -- no per-cell copy. This requires the executor JVM to have a single
+ * arrow-java (the cluster's Spark Arrow); the connector compiles against that version and never
+ * bundles its own, so our import and Spark's {@code ArrowColumnVector} share the same classes.
+ *
+ * <p>Lifecycle: the underlying Arrow vectors are owned by the {@link ArrowReader}. We do not close
+ * the {@link ColumnarBatch} (which would close those vectors a second time); {@link #close()}
+ * closes the reader -- freeing the vectors once -- and then the allocator.
+ */
+final class DatafusionColumnarPartitionReader implements PartitionReader<ColumnarBatch> {
+
+  private final BufferAllocator allocator;
+  private final DatafusionScan scan;
+  private final ArrowReader reader;
+  private final VectorSchemaRoot root;
+  private final ColumnarBatch batch;
+
+  DatafusionColumnarPartitionReader(DatafusionInputPartition partition) {
+    this.allocator = new RootAllocator();
+    try {
+      this.scan =
+          DatafusionScan.create(partition.provider, partition.config, partition.scanRequest);
+      this.reader = scan.executePartition(allocator, partition.index);
+      this.root = reader.getVectorSchemaRoot();
+      this.batch = new ColumnarBatch(wrap(root));
+    } catch (IOException e) {
+      allocator.close();
+      throw new RuntimeException("failed to open scan partition " + partition.index, e);
+    } catch (RuntimeException e) {
+      allocator.close();
+      throw e;
+    }
+  }
+
+  /** Wrap each Arrow vector of the (reused) root as a Spark column vector, once. */
+  private static ColumnVector[] wrap(VectorSchemaRoot root) {
+    ColumnVector[] columns = new ColumnVector[root.getFieldVectors().size()];
+    int i = 0;
+    for (FieldVector vector : root.getFieldVectors()) {
+      columns[i++] = new ArrowColumnVector(vector);
+    }
+    return columns;
+  }
+
+  @Override
+  public boolean next() throws IOException {
+    // The root's vectors are reloaded in place each batch; skip empty batches.
+    while (reader.loadNextBatch()) {
+      int rows = root.getRowCount();
+      if (rows > 0) {
+        batch.setNumRows(rows);
+        return true;
+      }
+    }
+    return false;
+  }
+
+  @Override
+  public ColumnarBatch get() {
+    return batch;
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      reader.close();
+    } finally {
+      try {
+        scan.close();
+      } finally {
+        allocator.close();
+      }
+    }
+  }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionInputPartition.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionInputPartition.java
new file mode 100644
index 0000000..8152aad
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionInputPartition.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import org.apache.spark.sql.connector.read.InputPartition;
+
+/**
+ * A serializable slice of a scan shipped to an executor. Carries only bytes and an index -- never a
+ * native handle, which would be meaningless in another process. The executor rebuilds the provider
+ * from {@code config} and runs partition {@code index}.
+ */
+final class DatafusionInputPartition implements InputPartition {
+
+  private static final long serialVersionUID = 1L;
+
+  final String provider;
+  final byte[] config;
+  final byte[] scanRequest;
+  final int index;
+
+  DatafusionInputPartition(String provider, byte[] config, byte[] scanRequest, int index) {
+    this.provider = provider;
+    this.config = config;
+    this.scanRequest = scanRequest;
+    this.index = index;
+  }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionPartitionReaderFactory.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionPartitionReaderFactory.java
new file mode 100644
index 0000000..2442eb2
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionPartitionReaderFactory.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+
+/**
+ * Creates a columnar reader per partition. Serialized to executors, so it holds no state.
+ *
+ * <p>Reads are columnar: {@link #supportColumnarReads} returns true, so Spark calls {@link
+ * #createColumnarReader} and consumes Arrow buffers directly via {@link
+ * DatafusionColumnarPartitionReader}. The row reader is unsupported.
+ */
+final class DatafusionPartitionReaderFactory implements PartitionReaderFactory {
+
+  private static final long serialVersionUID = 1L;
+
+  @Override
+  public boolean supportColumnarReads(InputPartition partition) {
+    return true;
+  }
+
+  @Override
+  public PartitionReader<ColumnarBatch> createColumnarReader(InputPartition partition) {
+    return new DatafusionColumnarPartitionReader((DatafusionInputPartition) partition);
+  }
+
+  @Override
+  public PartitionReader<InternalRow> createReader(InputPartition partition) {
+    throw new UnsupportedOperationException("datafusion source reads are columnar");
+  }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanBuilder.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanBuilder.java
new file mode 100644
index 0000000..9cafd37
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanBuilder.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import java.util.List;
+
+import org.apache.datafusion.protobuf.ScanRequest;
+import org.apache.spark.sql.connector.read.Scan;
+import org.apache.spark.sql.connector.read.ScanBuilder;
+import org.apache.spark.sql.connector.read.SupportsPushDownFilters;
+import org.apache.spark.sql.connector.read.SupportsPushDownLimit;
+import org.apache.spark.sql.connector.read.SupportsPushDownRequiredColumns;
+import org.apache.spark.sql.sources.Filter;
+import org.apache.spark.sql.types.StructType;
+
+import com.google.protobuf.ByteString;
+
+/**
+ * Captures Spark's projection, filter, and limit pushdown, encoding them into the {@code
+ * ScanRequest} the scan ABI consumes.
+ */
+final class DatafusionScanBuilder
+    implements ScanBuilder,
+        SupportsPushDownRequiredColumns,
+        SupportsPushDownFilters,
+        SupportsPushDownLimit {
+
+  private final String provider;
+  private final byte[] config;
+
+  private StructType requiredSchema;
+  private Filter[] pushedFilters = new Filter[0];
+  private List<byte[]> pushedFilterBytes = List.of();
+  private int limit = -1;
+
+  DatafusionScanBuilder(StructType fullSchema, String provider, byte[] config) {
+    this.provider = provider;
+    this.config = config;
+    this.requiredSchema = fullSchema;
+  }
+
+  @Override
+  public void pruneColumns(StructType requiredSchema) {
+    this.requiredSchema = requiredSchema;
+  }
+
+  @Override
+  public Filter[] pushFilters(Filter[] filters) {
+    SparkFilters.Result result = SparkFilters.split(filters);
+    this.pushedFilters = result.pushedFilters();
+    this.pushedFilterBytes = result.pushed();
+    return result.postScan();
+  }
+
+  @Override
+  public Filter[] pushedFilters() {
+    return pushedFilters;
+  }
+
+  @Override
+  public boolean pushLimit(int limit) {
+    // DataFusion enforces the limit exactly (df.limit after filters), and a
+    // limited plan coalesces to a single output partition, so the total row
+    // count is bounded. Report it as fully handled.
+    this.limit = limit;
+    return true;
+  }
+
+  @Override
+  public Scan build() {
+    ScanRequest.Builder request = ScanRequest.newBuilder();
+    for (String name : requiredSchema.fieldNames()) {
+      request.addProjection(name);
+    }
+    for (byte[] filter : pushedFilterBytes) {
+      request.addFilters(ByteString.copyFrom(filter));
+    }
+    if (limit >= 0) {
+      request.setLimit(limit);
+    }
+    return new DatafusionScanImpl(provider, config, request.build().toByteArray(), requiredSchema);
+  }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanImpl.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanImpl.java
new file mode 100644
index 0000000..3a48fba
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanImpl.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import org.apache.datafusion.scan.DatafusionScan;
+import org.apache.spark.sql.connector.read.Batch;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.Scan;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * A planned DataFusion scan as a Spark {@link Scan}/{@link Batch}.
+ *
+ * <p>{@link #planInputPartitions()} runs on the driver: it plans once to learn the partition count,
+ * then emits one serializable {@link DatafusionInputPartition} per partition carrying the config +
+ * request bytes (never a native handle). Each executor rebuilds and runs its own partition.
+ */
+final class DatafusionScanImpl implements Scan, Batch {
+
+  private final String provider;
+  private final byte[] config;
+  private final byte[] scanRequest;
+  private final StructType readSchema;
+
+  DatafusionScanImpl(String provider, byte[] config, byte[] scanRequest, StructType readSchema) {
+    this.provider = provider;
+    this.config = config;
+    this.scanRequest = scanRequest;
+    this.readSchema = readSchema;
+  }
+
+  /** The encoded ScanRequest bytes. Package-private for pushdown unit tests. */
+  byte[] scanRequestBytes() {
+    return scanRequest;
+  }
+
+  @Override
+  public StructType readSchema() {
+    return readSchema;
+  }
+
+  @Override
+  public Batch toBatch() {
+    return this;
+  }
+
+  @Override
+  public InputPartition[] planInputPartitions() {
+    int partitions;
+    try (DatafusionScan scan = DatafusionScan.create(provider, config, scanRequest)) {
+      partitions = scan.partitionCount();
+    }
+    InputPartition[] result = new InputPartition[partitions];
+    for (int i = 0; i < partitions; i++) {
+      result[i] = new DatafusionInputPartition(provider, config, scanRequest, i);
+    }
+    return result;
+  }
+
+  @Override
+  public PartitionReaderFactory createReaderFactory() {
+    return new DatafusionPartitionReaderFactory();
+  }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionTable.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionTable.java
new file mode 100644
index 0000000..d2e8f9d
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionTable.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import java.util.EnumSet;
+import java.util.Set;
+
+import org.apache.spark.sql.connector.catalog.SupportsRead;
+import org.apache.spark.sql.connector.catalog.TableCapability;
+import org.apache.spark.sql.connector.read.ScanBuilder;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/** A readable table over a DataFusion provider; produces {@link DatafusionScanBuilder}s. */
+final class DatafusionTable implements SupportsRead {
+
+  private final StructType schema;
+  private final String provider;
+  private final byte[] config;
+
+  DatafusionTable(StructType schema, String provider, byte[] config) {
+    this.schema = schema;
+    this.provider = provider;
+    this.config = config;
+  }
+
+  @Override
+  public String name() {
+    return "datafusion";
+  }
+
+  @Override
+  public StructType schema() {
+    return schema;
+  }
+
+  @Override
+  public Set<TableCapability> capabilities() {
+    return EnumSet.of(TableCapability.BATCH_READ);
+  }
+
+  @Override
+  public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
+    return new DatafusionScanBuilder(schema, provider, config);
+  }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionTableProvider.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionTableProvider.java
new file mode 100644
index 0000000..5d837d5
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionTableProvider.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.datafusion.scan.DatafusionScan;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableProvider;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.sources.DataSourceRegister;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * Entry point for the {@code datafusion} Spark data source.
+ *
+ * <p>Registered via {@code DataSourceRegister} so {@code
+ * spark.read.format("datafusion").option("path", ...).load()} resolves here. Options are decoded
+ * into a {@code ScanConfig} ({@link OptionsCodec}); the schema is probed once, on the driver,
+ * through {@link DatafusionScan#schema}.
+ */
+public final class DatafusionTableProvider implements TableProvider, DataSourceRegister {
+
+  @Override
+  public String shortName() {
+    return "datafusion";
+  }
+
+  @Override
+  public StructType inferSchema(CaseInsensitiveStringMap options) {
+    OptionsCodec.Source source = OptionsCodec.fromOptions(options);
+    try (BufferAllocator allocator = new RootAllocator()) {
+      Schema arrow = DatafusionScan.schema(allocator, source.provider(), source.config());
+      return SchemaConverter.toSparkSchema(arrow);
+    }
+  }
+
+  @Override
+  public Table getTable(
+      StructType schema, Transform[] partitioning, Map<String, String> properties) {
+    OptionsCodec.Source source = OptionsCodec.fromOptions(new CaseInsensitiveStringMap(properties));
+    return new DatafusionTable(schema, source.provider(), source.config());
+  }
+
+  @Override
+  public boolean supportsExternalMetadata() {
+    return false;
+  }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/OptionsCodec.java b/spark/src/main/java/org/apache/datafusion/spark/OptionsCodec.java
new file mode 100644
index 0000000..7aaed5c
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/OptionsCodec.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import java.util.Locale;
+
+import org.apache.datafusion.protobuf.CsvReadOptionsProto;
+import org.apache.datafusion.protobuf.ListingSource;
+import org.apache.datafusion.protobuf.NdJsonReadOptionsProto;
+import org.apache.datafusion.protobuf.ParquetReadOptionsProto;
+import org.apache.datafusion.protobuf.ScanConfig;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * Translates Spark data-source options into a {@code ScanConfig} for the {@code datafusion.listing}
+ * provider.
+ *
+ * <p>Recognized options: {@code path} (required), {@code format} ({@code csv|parquet|json}, default
+ * inferred from the path extension then {@code csv}), and for CSV {@code header} (default true) and
+ * {@code delimiter} (default {@code ,}).
+ */
+final class OptionsCodec {
+
+  static final String PROVIDER = "datafusion.listing";
+
+  private OptionsCodec() {}
+
+  /** The provider name plus the serialized ScanConfig the listing builder decodes. */
+  record Source(String provider, byte[] config) {}
+
+  static Source fromOptions(CaseInsensitiveStringMap options) {
+    String path = options.get("path");
+    if (path == null || path.isEmpty()) {
+      throw new IllegalArgumentException("the 'datafusion' source requires a 'path' option");
+    }
+    String format = options.containsKey("format") ? options.get("format") : inferFormat(path);
+
+    ListingSource.Builder listing = ListingSource.newBuilder().addPaths(path);
+    switch (format.toLowerCase(Locale.ROOT)) {
+      case "csv" ->
+          listing.setCsv(
+              CsvReadOptionsProto.newBuilder()
+                  .setHasHeader(options.getBoolean("header", true))
+                  .setDelimiter(delimiter(options))
+                  .setQuote('"')
+                  .setFileExtension(".csv")
+                  .build());
+      case "parquet" ->
+          listing.setParquet(
+              ParquetReadOptionsProto.newBuilder().setFileExtension(".parquet").build());
+      case "json" ->
+          listing.setJson(NdJsonReadOptionsProto.newBuilder().setFileExtension(".json").build());
+      default -> throw new IllegalArgumentException("unsupported format: " + format);
+    }
+
+    byte[] config =
+        ScanConfig.newBuilder()
+            .setProvider(PROVIDER)
+            .setListing(listing.build())
+            .build()
+            .toByteArray();
+    return new Source(PROVIDER, config);
+  }
+
+  private static int delimiter(CaseInsensitiveStringMap options) {
+    String d = options.containsKey("delimiter") ? options.get("delimiter") : ",";
+    if (d.length() != 1) {
+      throw new IllegalArgumentException("delimiter must be a single character, got: " + d);
+    }
+    return d.charAt(0);
+  }
+
+  private static String inferFormat(String path) {
+    String lower = path.toLowerCase(Locale.ROOT);
+    if (lower.endsWith(".parquet")) {
+      return "parquet";
+    }
+    if (lower.endsWith(".json")) {
+      return "json";
+    }
+    return "csv";
+  }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/SchemaConverter.java b/spark/src/main/java/org/apache/datafusion/spark/SchemaConverter.java
new file mode 100644
index 0000000..d61d9c4
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/SchemaConverter.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * Converts an Arrow schema (produced by the scan ABI) into a Spark {@link StructType}.
+ *
+ * <p>Done directly rather than through Spark's {@code ArrowUtils} so the connector depends only on
+ * our Arrow version, never Spark's bundled one. Covers the primitive types the row reader produces;
+ * unsupported types fail fast.
+ */
+final class SchemaConverter {
+
+  private SchemaConverter() {}
+
+  static StructType toSparkSchema(Schema arrowSchema) {
+    StructType struct = new StructType();
+    for (Field field : arrowSchema.getFields()) {
+      struct = struct.add(field.getName(), toSparkType(field), field.isNullable());
+    }
+    return struct;
+  }
+
+  static DataType toSparkType(Field field) {
+    ArrowType type = field.getType();
+    if (type instanceof ArrowType.Int i) {
+      if (!i.getIsSigned()) {
+        throw unsupported(field);
+      }
+      return switch (i.getBitWidth()) {
+        case 8 -> DataTypes.ByteType;
+        case 16 -> DataTypes.ShortType;
+        case 32 -> DataTypes.IntegerType;
+        case 64 -> DataTypes.LongType;
+        default -> throw unsupported(field);
+      };
+    }
+    if (type instanceof ArrowType.FloatingPoint fp) {
+      return fp.getPrecision() == FloatingPointPrecision.DOUBLE
+          ? DataTypes.DoubleType
+          : DataTypes.FloatType;
+    }
+    if (type instanceof ArrowType.Utf8 || type instanceof ArrowType.LargeUtf8) {
+      return DataTypes.StringType;
+    }
+    if (type instanceof ArrowType.Bool) {
+      return DataTypes.BooleanType;
+    }
+    throw unsupported(field);
+  }
+
+  private static IllegalArgumentException unsupported(Field field) {
+    return new IllegalArgumentException(
+        "unsupported Arrow type for column '" + field.getName() + "': " + field.getType());
+  }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/SparkFilters.java b/spark/src/main/java/org/apache/datafusion/spark/SparkFilters.java
new file mode 100644
index 0000000..39be5c6
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/SparkFilters.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.datafusion.protobuf.BinaryExprNode;
+import org.apache.datafusion.protobuf.IsNotNull;
+import org.apache.datafusion.protobuf.IsNull;
+import org.apache.datafusion.protobuf.LogicalExprNode;
+import org.apache.datafusion.protobuf.Not;
+import org.apache.spark.sql.sources.And;
+import org.apache.spark.sql.sources.EqualTo;
+import org.apache.spark.sql.sources.Filter;
+import org.apache.spark.sql.sources.GreaterThan;
+import org.apache.spark.sql.sources.GreaterThanOrEqual;
+import org.apache.spark.sql.sources.LessThan;
+import org.apache.spark.sql.sources.LessThanOrEqual;
+import org.apache.spark.sql.sources.Or;
+
+import datafusion_common.DatafusionCommon.Column;
+import datafusion_common.DatafusionCommon.ScalarValue;
+
+/**
+ * Translates Spark {@link Filter}s into serialized {@code datafusion.LogicalExprNode} bytes for
+ * filter pushdown.
+ *
+ * <p>Translates the comparison, boolean, and null predicates over primitive literals that map
+ * cleanly; anything else is reported as not pushed so Spark applies it itself. A translated filter
+ * is applied exactly by DataFusion (the scan core calls {@code DataFrame::filter}), so it is safe
+ * to treat it as fully handled.
+ */
+final class SparkFilters {
+
+  private SparkFilters() {}
+
+  /** Pushed filter bytes, and the filters Spark must still apply itself. */
+  record Result(List<byte[]> pushed, Filter[] pushedFilters, Filter[] postScan) {}
+
+  static Result split(Filter[] filters) {
+    List<byte[]> pushed = new ArrayList<>();
+    List<Filter> pushedFilters = new ArrayList<>();
+    List<Filter> postScan = new ArrayList<>();
+    for (Filter filter : filters) {
+      LogicalExprNode expr = translate(filter);
+      if (expr != null) {
+        pushed.add(expr.toByteArray());
+        pushedFilters.add(filter);
+      } else {
+        postScan.add(filter);
+      }
+    }
+    return new Result(
+        pushed, pushedFilters.toArray(new Filter[0]), postScan.toArray(new Filter[0]));
+  }
+
+  /** Translate a single filter, or return null if it cannot be expressed. */
+  private static LogicalExprNode translate(Filter filter) {
+    if (filter instanceof EqualTo f) {
+      return binary("Eq", f.attribute(), f.value());
+    }
+    if (filter instanceof GreaterThan f) {
+      return binary("Gt", f.attribute(), f.value());
+    }
+    if (filter instanceof GreaterThanOrEqual f) {
+      return binary("GtEq", f.attribute(), f.value());
+    }
+    if (filter instanceof LessThan f) {
+      return binary("Lt", f.attribute(), f.value());
+    }
+    if (filter instanceof LessThanOrEqual f) {
+      return binary("LtEq", f.attribute(), f.value());
+    }
+    if (filter instanceof org.apache.spark.sql.sources.IsNull f) {
+      return wrap(b -> b.setIsNullExpr(IsNull.newBuilder().setExpr(column(f.attribute()))));
+    }
+    if (filter instanceof org.apache.spark.sql.sources.IsNotNull f) {
+      return wrap(b -> b.setIsNotNullExpr(IsNotNull.newBuilder().setExpr(column(f.attribute()))));
+    }
+    if (filter instanceof And f) {
+      LogicalExprNode l = translate(f.left());
+      LogicalExprNode r = translate(f.right());
+      return (l == null || r == null) ? null : binaryNodes("And", l, r);
+    }
+    if (filter instanceof Or f) {
+      LogicalExprNode l = translate(f.left());
+      LogicalExprNode r = translate(f.right());
+      return (l == null || r == null) ? null : binaryNodes("Or", l, r);
+    }
+    if (filter instanceof org.apache.spark.sql.sources.Not f) {
+      LogicalExprNode child = translate(f.child());
+      return child == null ? null : wrap(b -> b.setNotExpr(Not.newBuilder().setExpr(child)));
+    }
+    return null;
+  }
+
+  private static LogicalExprNode binary(String op, String attribute, Object value) {
+    ScalarValue literal = scalar(value);
+    if (literal == null) {
+      return null;
+    }
+    return binaryNodes(
+        op, column(attribute), LogicalExprNode.newBuilder().setLiteral(literal).build());
+  }
+
+  private static LogicalExprNode binaryNodes(
+      String op, LogicalExprNode left, LogicalExprNode right) {
+    return LogicalExprNode.newBuilder()
+        .setBinaryExpr(BinaryExprNode.newBuilder().addOperands(left).addOperands(right).setOp(op))
+        .build();
+  }
+
+  private static LogicalExprNode column(String attribute) {
+    return LogicalExprNode.newBuilder().setColumn(Column.newBuilder().setName(attribute)).build();
+  }
+
+  private interface ExprFiller {
+    LogicalExprNode.Builder apply(LogicalExprNode.Builder builder);
+  }
+
+  private static LogicalExprNode wrap(ExprFiller filler) {
+    return filler.apply(LogicalExprNode.newBuilder()).build();
+  }
+
+  /** Map a Spark literal to a DataFusion ScalarValue, or null if unsupported. */
+  private static ScalarValue scalar(Object value) {
+    if (value instanceof Long v) {
+      return ScalarValue.newBuilder().setInt64Value(v).build();
+    }
+    if (value instanceof Integer v) {
+      return ScalarValue.newBuilder().setInt32Value(v).build();
+    }
+    if (value instanceof Double v) {
+      return ScalarValue.newBuilder().setFloat64Value(v).build();
+    }
+    if (value instanceof Float v) {
+      return ScalarValue.newBuilder().setFloat32Value(v).build();
+    }
+    if (value instanceof Boolean v) {
+      return ScalarValue.newBuilder().setBoolValue(v).build();
+    }
+    if (value instanceof String v) {
+      return ScalarValue.newBuilder().setUtf8Value(v).build();
+    }
+    return null;
+  }
+}
diff --git a/spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
new file mode 100644
index 0000000..fd603b1
--- /dev/null
+++ b/spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -0,0 +1 @@
+org.apache.datafusion.spark.DatafusionTableProvider
diff --git a/spark/src/test/java/org/apache/datafusion/spark/DatafusionScanBuilderTest.java b/spark/src/test/java/org/apache/datafusion/spark/DatafusionScanBuilderTest.java
new file mode 100644
index 0000000..b1695ba
--- /dev/null
+++ b/spark/src/test/java/org/apache/datafusion/spark/DatafusionScanBuilderTest.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.List;
+
+import org.apache.datafusion.protobuf.ScanRequest;
+import org.apache.spark.sql.connector.read.SupportsPushDownFilters;
+import org.apache.spark.sql.sources.Filter;
+import org.apache.spark.sql.sources.GreaterThanOrEqual;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructType;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Unit-level proof that the scan builder encodes pushdown into the ScanRequest, isolated from
+ * Spark's own limit/filter handling (which would mask whether we pushed anything).
+ */
+class DatafusionScanBuilderTest {
+
+  private static final StructType SCHEMA =
+      new StructType().add("id", DataTypes.LongType).add("name", DataTypes.StringType);
+
+  private DatafusionScanBuilder builder() {
+    return new DatafusionScanBuilder(SCHEMA, "datafusion.listing", new byte[0]);
+  }
+
+  private static ScanRequest decode(org.apache.spark.sql.connector.read.Scan scan)
+      throws Exception {
+    return ScanRequest.parseFrom(((DatafusionScanImpl) scan).scanRequestBytes());
+  }
+
+  @Test
+  void pushesLimit() throws Exception {
+    DatafusionScanBuilder b = builder();
+    assertTrue(b.pushLimit(7), "limit should be reported as fully pushed");
+    ScanRequest request = decode(b.build());
+    assertTrue(request.hasLimit());
+    assertEquals(7L, request.getLimit());
+  }
+
+  @Test
+  void noLimitWhenNotPushed() throws Exception {
+    ScanRequest request = decode(builder().build());
+    assertFalse(request.hasLimit(), "limit must be unset when Spark pushes none");
+  }
+
+  @Test
+  void pushesProjection() throws Exception {
+    DatafusionScanBuilder b = builder();
+    b.pruneColumns(new StructType().add("name", DataTypes.StringType));
+    ScanRequest request = decode(b.build());
+    assertEquals(List.of("name"), request.getProjectionList());
+  }
+
+  @Test
+  void pushesComparisonFilter() throws Exception {
+    DatafusionScanBuilder b = builder();
+    Filter[] residual =
+        ((SupportsPushDownFilters) b).pushFilters(new Filter[] {new GreaterThanOrEqual("id", 2L)});
+    assertEquals(0, residual.length, "a translatable filter should be fully pushed");
+    ScanRequest request = decode(b.build());
+    assertEquals(1, request.getFiltersCount());
+  }
+}
diff --git a/spark/src/test/java/org/apache/datafusion/spark/DatafusionSourceTest.java b/spark/src/test/java/org/apache/datafusion/spark/DatafusionSourceTest.java
new file mode 100644
index 0000000..4165921
--- /dev/null
+++ b/spark/src/test/java/org/apache/datafusion/spark/DatafusionSourceTest.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.functions;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * End-to-end test of the {@code datafusion} Spark data source against a local SparkSession: the
+ * connector reads a CSV through the DataFusion listing provider and the plain-C scan ABI, all the
+ * way back to Spark rows. Covers schema inference, full scan, projection, and filter pushdown.
+ */
+class DatafusionSourceTest {
+
+  private static SparkSession spark;
+
+  @TempDir static Path tmp;
+
+  @BeforeAll
+  static void startSpark() {
+    spark =
+        SparkSession.builder()
+            .master("local[2]")
+            .appName("datafusion-source-test")
+            .config("spark.ui.enabled", "false")
+            .config("spark.sql.shuffle.partitions", "2")
+            .getOrCreate();
+  }
+
+  @AfterAll
+  static void stopSpark() {
+    if (spark != null) {
+      spark.stop();
+    }
+  }
+
+  private Dataset<Row> read() throws Exception {
+    Path csv = tmp.resolve("data.csv");
+    Files.writeString(csv, "id,name\n1,a\n2,b\n3,c\n");
+    return spark
+        .read()
+        .format("datafusion")
+        .option("path", csv.toString())
+        .option("format", "csv")
+        .load();
+  }
+
+  @Test
+  void inferredSchema() throws Exception {
+    List<String> columns = Arrays.asList(read().schema().fieldNames());
+    assertEquals(List.of("id", "name"), columns);
+  }
+
+  @Test
+  void fullScanReturnsAllRows() throws Exception {
+    assertEquals(3, read().count());
+  }
+
+  @Test
+  void projectionSelectsColumns() throws Exception {
+    Dataset<Row> names = read().select("name");
+    assertEquals(List.of("name"), Arrays.asList(names.schema().fieldNames()));
+    assertEquals(3, names.count());
+  }
+
+  @Test
+  void limitPushdownCapsRows() throws Exception {
+    assertEquals(2, read().limit(2).count());
+  }
+
+  @Test
+  void filterPushdownReducesRows() throws Exception {
+    Dataset<Row> filtered = read().filter(functions.col("id").geq(2));
+    assertEquals(2, filtered.count());
+
+    List<Long> ids = filtered.select("id").as(org.apache.spark.sql.Encoders.LONG()).collectAsList();
+    assertTrue(ids.stream().allMatch(id -> id >= 2), "all surviving ids should be >= 2");
+    assertEquals(2L + 3L, ids.stream().mapToLong(Long::longValue).sum());
+  }
+}