+ value="${maven.multiModuleProjectDirectory}/rust-target/${datafusion.native.profile}/${datafusion.lib.filename}"/>
+
diff --git a/core/src/main/java/org/apache/datafusion/SessionContext.java b/core/src/main/java/org/apache/datafusion/SessionContext.java
index ec0bd85..b68cda5 100644
--- a/core/src/main/java/org/apache/datafusion/SessionContext.java
+++ b/core/src/main/java/org/apache/datafusion/SessionContext.java
@@ -113,10 +113,11 @@ public DataFrame fromProto(byte[] planBytes) {
* other Substrait-emitting tool — and hand them to DataFusion without round-tripping through SQL.
*
* Substrait support is gated behind the {@code substrait} Cargo feature on the native crate
- * and is off by default. Rebuild the native crate with {@code cargo build
- * --features substrait} (or {@code cargo build --features substrait,protoc} for hermetic builds
- * that vendor {@code protoc} via {@code cmake}) to enable it. If invoked against a native binary
- * built without the feature, this method throws {@link RuntimeException} pointing at the flag.
+ * and is off by default. Rebuild the native crate with {@code cargo build -p
+ * datafusion-jni --features substrait} (or {@code ... --features substrait,protoc} for hermetic
+ * builds that vendor {@code protoc} via {@code cmake}) to enable it. If invoked against a native
+ * binary built without the feature, this method throws {@link RuntimeException} pointing at the
+ * flag.
*
* @throws IllegalArgumentException if {@code planBytes} is {@code null}.
* @throws IllegalStateException if this context is closed.
@@ -183,7 +184,7 @@ public MemoryUsage memoryUsage() {
* Rebuild with:
*
*
{@code
- * RUSTFLAGS="--cfg tokio_unstable" cargo build --features runtime-metrics
+ * RUSTFLAGS="--cfg tokio_unstable" cargo build -p datafusion-jni --features runtime-metrics
* }
*
* If invoked against a native binary built without the feature, this method throws {@link
diff --git a/core/src/main/java/org/apache/datafusion/scan/DatafusionScan.java b/core/src/main/java/org/apache/datafusion/scan/DatafusionScan.java
new file mode 100644
index 0000000..6a2d43b
--- /dev/null
+++ b/core/src/main/java/org/apache/datafusion/scan/DatafusionScan.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.scan;
+
+import org.apache.arrow.c.ArrowArrayStream;
+import org.apache.arrow.c.ArrowSchema;
+import org.apache.arrow.c.CDataDictionaryProvider;
+import org.apache.arrow.c.Data;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * A planned scan over a DataFusion {@code TableProvider}, driven through the plain-C scan ABI.
+ *
+ *
This is the JVM-facing wrapper over {@link NativeScan}. Each scanned partition is returned as
+ * an {@link ArrowReader} imported from a native {@code FFI_ArrowArrayStream} through the Arrow C
+ * Stream interface, so record batches never pass through JNI -- they cross via the Arrow C Data
+ * interface that arrow-java already speaks. This mirrors {@code DataFrame#collect}.
+ *
+ *
The provider and its parameters are supplied as a serialized {@code ScanConfig}; pushed-down
+ * projection/filters/tuning as a serialized {@code ScanRequest}. Both are built with the generated
+ * protobuf classes in {@code org.apache.datafusion.protobuf}.
+ *
+ *
Not thread-safe with respect to {@link #close()}: callers must not close a scan while a
+ * partition execute is in flight on another thread.
+ */
+public final class DatafusionScan implements AutoCloseable {
+
+ private final long handle;
+ private boolean closed;
+
+ private DatafusionScan(long handle) {
+ this.handle = handle;
+ }
+
+ /**
+ * Probe a provider's output schema without planning a scan.
+ *
+ * @param allocator allocator for the transient C schema struct
+ * @param provider registered builder name (e.g. {@code datafusion.listing})
+ * @param config serialized {@code ScanConfig}
+ */
+ public static Schema schema(BufferAllocator allocator, String provider, byte[] config) {
+ ArrowSchema cSchema = ArrowSchema.allocateNew(allocator);
+ CDataDictionaryProvider dictionaries = new CDataDictionaryProvider();
+ NativeScan.providerSchema(provider, config, cSchema.memoryAddress());
+ // importField takes ownership of the C struct and returns the struct-typed
+ // root; its children are the table's columns.
+ Field root = Data.importField(allocator, cSchema, dictionaries);
+ return new Schema(root.getChildren());
+ }
+
+ /**
+ * Plan a scan over {@code provider}.
+ *
+ * @param provider registered builder name
+ * @param config serialized {@code ScanConfig}
+ * @param scanRequest serialized {@code ScanRequest}, or {@code null}/empty for no pushdown
+ */
+ public static DatafusionScan create(String provider, byte[] config, byte[] scanRequest) {
+ byte[] request = scanRequest == null ? new byte[0] : scanRequest;
+ return new DatafusionScan(NativeScan.createScan(provider, config, request));
+ }
+
+ /** Number of output partitions this scan produces. */
+ public int partitionCount() {
+ return NativeScan.partitionCount(handle);
+ }
+
+ /**
+ * Execute one partition. The returned {@link ArrowReader} owns the underlying stream; close it
+ * when done. Safe to call concurrently for distinct partitions.
+ */
+ public ArrowReader executePartition(BufferAllocator allocator, int partition) {
+ ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator);
+ NativeScan.executeStreamPartition(handle, partition, stream.memoryAddress());
+ return Data.importArrayStream(allocator, stream);
+ }
+
+ /** Execute the whole plan as a single coalesced reader. */
+ public ArrowReader execute(BufferAllocator allocator) {
+ ArrowArrayStream stream = ArrowArrayStream.allocateNew(allocator);
+ NativeScan.executeStream(handle, stream.memoryAddress());
+ return Data.importArrayStream(allocator, stream);
+ }
+
+ @Override
+ public synchronized void close() {
+ if (closed) {
+ return;
+ }
+ closed = true;
+ NativeScan.closeScan(handle);
+ }
+}
diff --git a/core/src/main/java/org/apache/datafusion/scan/NativeScan.java b/core/src/main/java/org/apache/datafusion/scan/NativeScan.java
new file mode 100644
index 0000000..31093d4
--- /dev/null
+++ b/core/src/main/java/org/apache/datafusion/scan/NativeScan.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.scan;
+
+/**
+ * Raw native bindings to the {@code datafusion_scan_jni} shim.
+ *
+ *
Every method is a thin pass-through to the in-process scan core. Arrow data is never marshaled
+ * across this boundary: the {@code *Addr} arguments are the memory addresses of {@code
+ * org.apache.arrow.c.ArrowSchema} / {@code ArrowArrayStream} structs allocated by arrow-java, which
+ * the native side fills in place. Callers should use {@link DatafusionScan} rather than these
+ * directly.
+ */
+final class NativeScan {
+
+ static {
+ ScanNativeLoader.load();
+ }
+
+ private NativeScan() {}
+
+ /** Probe a provider's output schema into the {@code ArrowSchema} at {@code schemaAddr}. */
+ static native void providerSchema(String provider, byte[] config, long schemaAddr);
+
+ /**
+ * Plan a scan. Returns an opaque handle; release it with {@link #closeScan(long)}.
+ *
+ * @param provider registered builder name (e.g. {@code datafusion.listing})
+ * @param config serialized {@code ScanConfig}
+ * @param scanRequest serialized {@code ScanRequest} (pushdown), or empty for none
+ */
+ static native long createScan(String provider, byte[] config, byte[] scanRequest);
+
+ /** Output partition count of a planned scan. */
+ static native int partitionCount(long handle);
+
+ /** Execute one partition into the {@code ArrowArrayStream} at {@code streamAddr}. */
+ static native void executeStreamPartition(long handle, int partition, long streamAddr);
+
+ /** Execute the whole plan as one coalesced stream into {@code streamAddr}. */
+ static native void executeStream(long handle, long streamAddr);
+
+ /** Drop a planned scan. Null-safe. */
+ static native void closeScan(long handle);
+}
diff --git a/core/src/main/java/org/apache/datafusion/scan/ScanNativeLoader.java b/core/src/main/java/org/apache/datafusion/scan/ScanNativeLoader.java
new file mode 100644
index 0000000..6540ce4
--- /dev/null
+++ b/core/src/main/java/org/apache/datafusion/scan/ScanNativeLoader.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.scan;
+
+/**
+ * Loads the {@code datafusion_scan_jni} shim library.
+ *
+ *
This is the JVM adapter over the plain-C scan ABI exported by {@code
+ * datafusion-scan-ffi}. The library is loaded from {@code java.library.path} (set it with {@code
+ * -Djava.library.path=...} or the platform library-path environment variable so it can find the
+ * built {@code libdatafusion_scan_jni}). Classpath bundling, as the core {@code datafusion_jni}
+ * library does, is left to release packaging.
+ */
+final class ScanNativeLoader {
+
+ private static final String LIBRARY_NAME = "datafusion_scan_jni";
+
+ private static volatile boolean loaded;
+
+ private ScanNativeLoader() {}
+
+ static synchronized void load() {
+ if (loaded) {
+ return;
+ }
+ System.loadLibrary(LIBRARY_NAME);
+ loaded = true;
+ }
+}
diff --git a/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java b/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java
index 120d179..d567275 100644
--- a/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java
+++ b/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java
@@ -37,7 +37,7 @@
* #checkFeatureEnabled}. Run
*
*
{@code
- * (cd native && RUSTFLAGS="--cfg tokio_unstable" cargo build --features runtime-metrics)
+ * RUSTFLAGS="--cfg tokio_unstable" cargo build -p datafusion-jni --features runtime-metrics
* }
*
* before {@code ./mvnw test} to exercise this class.
diff --git a/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java b/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java
index 34db3b5..a2cfb0a 100644
--- a/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java
+++ b/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java
@@ -50,7 +50,7 @@
*
* The {@code substrait} Cargo feature is off by default in {@code native/Cargo.toml}; if the
* native crate was built without it, every test here is skipped (see {@link #checkFeatureEnabled}).
- * Run {@code (cd native && cargo build --features substrait)} before {@code ./mvnw test} to
+ * Run {@code cargo build -p datafusion-jni --features substrait} before {@code ./mvnw test} to
* exercise this class.
*/
class SessionContextSubstraitTest {
diff --git a/core/src/test/java/org/apache/datafusion/scan/DatafusionScanTest.java b/core/src/test/java/org/apache/datafusion/scan/DatafusionScanTest.java
new file mode 100644
index 0000000..2cf61f7
--- /dev/null
+++ b/core/src/test/java/org/apache/datafusion/scan/DatafusionScanTest.java
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.scan;
+
+import static java.util.stream.Collectors.toList;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import com.google.protobuf.ByteString;
+import datafusion_common.DatafusionCommon.Column;
+import datafusion_common.DatafusionCommon.ScalarValue;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.datafusion.protobuf.BinaryExprNode;
+import org.apache.datafusion.protobuf.CsvReadOptionsProto;
+import org.apache.datafusion.protobuf.ListingSource;
+import org.apache.datafusion.protobuf.LogicalExprNode;
+import org.apache.datafusion.protobuf.ScanConfig;
+import org.apache.datafusion.protobuf.ScanRequest;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * End-to-end exercise of the JNI shim: drive the {@code datafusion.listing} provider over a CSV
+ * entirely from Java, confirming the Arrow C Stream produced by arrow-rs imports cleanly through
+ * arrow-java's {@code Data.importArrayStream}. This is the proof that the C Stream ABI matches
+ * across the two Arrow implementations through this path.
+ */
+class DatafusionScanTest {
+
+ private static final String PROVIDER = "datafusion.listing";
+
+ @TempDir Path tmp;
+
+ /** Build a ScanConfig for a CSV listing source, using the generated protobuf builders. */
+ private byte[] csvConfig(String path) {
+ return ScanConfig.newBuilder()
+ .setProvider(PROVIDER)
+ .setListing(
+ ListingSource.newBuilder()
+ .addPaths(path)
+ .setCsv(
+ CsvReadOptionsProto.newBuilder()
+ .setHasHeader(true)
+ .setDelimiter(',')
+ .setQuote('"')
+ .setFileExtension(".csv")
+ .build())
+ .build())
+ .build()
+ .toByteArray();
+ }
+
+ @Test
+ void inferredSchemaMatchesCsvHeader() throws Exception {
+ Path csv = tmp.resolve("data.csv");
+ Files.writeString(csv, "id,name\n1,a\n2,b\n3,c\n");
+ byte[] config = csvConfig(csv.toString());
+
+ try (BufferAllocator allocator = new RootAllocator()) {
+ Schema schema = DatafusionScan.schema(allocator, PROVIDER, config);
+ List names = schema.getFields().stream().map(Field::getName).collect(toList());
+ assertEquals(List.of("id", "name"), names);
+ }
+ }
+
+ @Test
+ void scansCsvRowsThroughArrowCStream() throws Exception {
+ Path csv = tmp.resolve("data.csv");
+ Files.writeString(csv, "id,name\n1,a\n2,b\n3,c\n");
+ byte[] config = csvConfig(csv.toString());
+
+ try (BufferAllocator allocator = new RootAllocator();
+ DatafusionScan scan = DatafusionScan.create(PROVIDER, config, null)) {
+ assertTrue(scan.partitionCount() >= 1, "expected at least one partition");
+
+ long total = 0;
+ int rows = 0;
+ try (ArrowReader reader = scan.execute(allocator)) {
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+ while (reader.loadNextBatch()) {
+ rows += root.getRowCount();
+ BigIntVector ids = (BigIntVector) root.getVector("id");
+ for (int i = 0; i < root.getRowCount(); i++) {
+ total += ids.get(i);
+ }
+ }
+ }
+ assertEquals(3, rows);
+ assertEquals(1 + 2 + 3, total);
+ }
+ }
+
+ @Test
+ void projectionPrunesColumns() throws Exception {
+ byte[] config = csvConfig(writeCsv());
+ // Pushed projection: keep only "name".
+ byte[] request = ScanRequest.newBuilder().addProjection("name").build().toByteArray();
+
+ try (BufferAllocator allocator = new RootAllocator();
+ DatafusionScan scan = DatafusionScan.create(PROVIDER, config, request)) {
+ int rows = 0;
+ try (ArrowReader reader = scan.execute(allocator)) {
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+ List cols =
+ root.getSchema().getFields().stream().map(Field::getName).collect(toList());
+ assertEquals(List.of("name"), cols, "projection should drop the id column");
+ while (reader.loadNextBatch()) {
+ rows += root.getRowCount();
+ }
+ }
+ assertEquals(3, rows);
+ }
+ }
+
+ @Test
+ void filterPushdownSelectsRows() throws Exception {
+ byte[] config = csvConfig(writeCsv());
+ // Pushed filter: id >= 2.
+ byte[] request =
+ ScanRequest.newBuilder().addFilters(ByteString.copyFrom(idAtLeast(2))).build().toByteArray();
+
+ try (BufferAllocator allocator = new RootAllocator();
+ DatafusionScan scan = DatafusionScan.create(PROVIDER, config, request)) {
+ long total = 0;
+ int rows = 0;
+ try (ArrowReader reader = scan.execute(allocator)) {
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+ while (reader.loadNextBatch()) {
+ rows += root.getRowCount();
+ BigIntVector ids = (BigIntVector) root.getVector("id");
+ for (int i = 0; i < root.getRowCount(); i++) {
+ total += ids.get(i);
+ }
+ }
+ }
+ assertEquals(2, rows, "only id 2 and 3 pass the filter");
+ assertEquals(2 + 3, total);
+ }
+ }
+
+ @Test
+ void limitCapsRows() throws Exception {
+ byte[] config = csvConfig(writeCsv());
+ // Pushed limit of 2 over the 3-row CSV.
+ byte[] request = ScanRequest.newBuilder().setLimit(2).build().toByteArray();
+
+ try (BufferAllocator allocator = new RootAllocator();
+ DatafusionScan scan = DatafusionScan.create(PROVIDER, config, request)) {
+ int rows = 0;
+ try (ArrowReader reader = scan.execute(allocator)) {
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+ while (reader.loadNextBatch()) {
+ rows += root.getRowCount();
+ }
+ }
+ assertEquals(2, rows, "limit should cap the scan at 2 rows");
+ }
+ }
+
+ /** Serialize the LogicalExprNode for {@code id >= value}, as the engine's filter pushdown would. */
+ private static byte[] idAtLeast(long value) {
+ LogicalExprNode column =
+ LogicalExprNode.newBuilder().setColumn(Column.newBuilder().setName("id")).build();
+ LogicalExprNode literal =
+ LogicalExprNode.newBuilder()
+ .setLiteral(ScalarValue.newBuilder().setInt64Value(value))
+ .build();
+ return LogicalExprNode.newBuilder()
+ .setBinaryExpr(
+ BinaryExprNode.newBuilder().addOperands(column).addOperands(literal).setOp("GtEq"))
+ .build()
+ .toByteArray();
+ }
+
+ private String writeCsv() throws Exception {
+ Path csv = tmp.resolve("data.csv");
+ Files.writeString(csv, "id,name\n1,a\n2,b\n3,c\n");
+ return csv.toString();
+ }
+}
diff --git a/dev/release/build-release.sh b/dev/release/build-release.sh
index 2b033bb..4d4ab13 100755
--- a/dev/release/build-release.sh
+++ b/dev/release/build-release.sh
@@ -135,26 +135,28 @@ JVM_TARGET_DIR="$PROJECT_HOME/core/target/classes/org/apache/datafusion"
mkdir -p "$JVM_TARGET_DIR/linux/amd64"
docker cp \
- "$CONTAINER_AMD64:/opt/datafusion-java-rm/datafusion-java/native/target/release/libdatafusion_jni.so" \
+ "$CONTAINER_AMD64:/opt/datafusion-java-rm/datafusion-java/rust-target/release/libdatafusion_jni.so" \
"$JVM_TARGET_DIR/linux/amd64/"
mkdir -p "$JVM_TARGET_DIR/linux/aarch64"
docker cp \
- "$CONTAINER_ARM64:/opt/datafusion-java-rm/datafusion-java/native/target/release/libdatafusion_jni.so" \
+ "$CONTAINER_ARM64:/opt/datafusion-java-rm/datafusion-java/rust-target/release/libdatafusion_jni.so" \
"$JVM_TARGET_DIR/linux/aarch64/"
echo "Building macOS native libs on the host (host=$HOST_ARCH)"
rustup target add "$OTHER_DARWIN_TARGET"
-(cd "$PROJECT_HOME/native" && cargo build --release)
-(cd "$PROJECT_HOME/native" && cargo build --release --target "$OTHER_DARWIN_TARGET")
+# Cargo writes to the workspace `rust-target/` dir (set in .cargo/config.toml),
+# not the per-crate `native/target/`, so build from the repo root.
+(cd "$PROJECT_HOME" && cargo build --release -p datafusion-jni)
+(cd "$PROJECT_HOME" && cargo build --release -p datafusion-jni --target "$OTHER_DARWIN_TARGET")
mkdir -p "$JVM_TARGET_DIR/darwin/$HOST_DARWIN_DIR"
-cp "$PROJECT_HOME/native/target/release/libdatafusion_jni.dylib" \
+cp "$PROJECT_HOME/rust-target/release/libdatafusion_jni.dylib" \
"$JVM_TARGET_DIR/darwin/$HOST_DARWIN_DIR/"
mkdir -p "$JVM_TARGET_DIR/darwin/$OTHER_DARWIN_DIR"
-cp "$PROJECT_HOME/native/target/$OTHER_DARWIN_TARGET/release/libdatafusion_jni.dylib" \
+cp "$PROJECT_HOME/rust-target/$OTHER_DARWIN_TARGET/release/libdatafusion_jni.dylib" \
"$JVM_TARGET_DIR/darwin/$OTHER_DARWIN_DIR/"
echo "Installing JAR into local Maven repo"
diff --git a/dev/release/datafusion-java-rm/build-native-libs.sh b/dev/release/datafusion-java-rm/build-native-libs.sh
index 5f273cc..79f8ae0 100755
--- a/dev/release/datafusion-java-rm/build-native-libs.sh
+++ b/dev/release/datafusion-java-rm/build-native-libs.sh
@@ -38,8 +38,9 @@ git clone "$REPO" datafusion-java
cd datafusion-java
git checkout "$BRANCH"
-cd native
-cargo build --release
+# Cargo writes to the workspace `rust-target/` dir (set in .cargo/config.toml),
+# not the per-crate `native/target/`, so build from the repo root.
+cargo build --release -p datafusion-jni
-echo "Built $(pwd)/target/release/libdatafusion_jni.so"
-ls -l target/release/libdatafusion_jni.so
+echo "Built $(pwd)/rust-target/release/libdatafusion_jni.so"
+ls -l rust-target/release/libdatafusion_jni.so
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 81d83e8..3dbd90f 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -7,7 +7,7 @@
.mvn/wrapper/maven-wrapper.properties
mvnw
mvnw.cmd
-native/Cargo.lock
+Cargo.lock
dev/release/rat_exclude_files.txt
docs/source/_static/**
docs/source/conf.py
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index e486adc..c7767bf 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -150,7 +150,8 @@ test_source_distribution() {
# raises on any formatting errors
rustup component add rustfmt
- (cd native && cargo fmt --all -- --check)
+ # Workspace-wide: covers native, native-common, and any future members.
+ cargo fmt --all -- --check
# build native + JVM and run the full test suite
make test
diff --git a/docs/datafusion-spark-design.md b/docs/datafusion-spark-design.md
new file mode 100644
index 0000000..1019f3b
--- /dev/null
+++ b/docs/datafusion-spark-design.md
@@ -0,0 +1,304 @@
+# DataFusion-backed Spark DataSource: design
+
+## Goal
+
+Let Spark read from a DataFusion `TableProvider` as a native `DataSourceV2`,
+with the native boundary placed at the **Arrow C Data / C Stream interface and
+plain C types** — not at handwritten JNI per operation.
+
+## Origin
+
+On [PR #104](https://github.com/apache/datafusion-java/pull/104), Dewey
+Dunnington (@paleolimbot) reviewed an earlier stack (PR #103) whose cdylib
+exported JNI entry points directly, and argued for a cleaner shape:
+
+> build a cdylib that exports entrypoints that just use the Arrow C Data/Stream
+> interface and C types. That also has broader applicability to non-Java (i.e.,
+> can live in datafusion proper and get eyes/reviews from a wider audience).
+
+This design follows that: the reusable artifact is a **plain-C scan ABI** over
+Arrow C types; JNI is a thin, separable adapter; the same ABI is callable from
+Python/Go/Rust/FFM. "Approach A" — the providers we ship are compiled into the
+cdylib and selected by name, rather than imported over `datafusion-ffi`.
+
+## Principle: two planes, both zero-copy
+
+| Plane | Carries | Crosses via |
+| --- | --- | --- |
+| **Data** | Arrow record batches | Arrow C Stream (`FFI_ArrowArrayStream`) → arrow-java import → Spark `ArrowColumnVector` |
+| **Control** | provider name, config, pushdown, partition index | plain-C calls passing `(ptr, len)` and `long` addresses |
+
+No Arrow data is ever marshaled through JNI. Batches flow through the Arrow C
+Data interface, which arrow-java and arrow-rs already speak; the JVM gets real
+Arrow vectors and hands them to Spark with no per-cell copy.
+
+## Architecture
+
+```
+ spark.read.format("datafusion").option("path", ...).load()
+ │
+ ▼ datafusion-spark (Maven module, Java, Spark 4.0)
+ │ TableProvider → Table → ScanBuilder (projection / filter / limit pushdown)
+ │ → Scan/Batch → InputPartition[] (serializable: config + request bytes + index)
+ │ → PartitionReaderFactory → ColumnarPartitionReader
+ │
+ ▼ core: org.apache.datafusion.scan.DatafusionScan (JVM scan API)
+ │ + NativeScan (6 JNI methods) ──loads──► libdatafusion_scan_jni
+ │
+ ▼ native-jni: datafusion-scan-jni (cdylib) ← thin JVM adapter
+ │ Java_…_NativeScan_* → calls the scan core; writes FFI_ArrowArrayStream
+ │ into the address arrow-java allocated
+ │
+ ▼ native-ffi: datafusion-scan-ffi (cdylib + rlib) ← the reusable plain-C ABI
+ │ df_scan_* (extern "C") → scan core → registered provider builder
+ │ data plane: FFI_ArrowArrayStream (arrow-rs)
+ │
+ ▼ DataFusion: TableProvider (e.g. ListingTable) reads the source
+```
+
+Non-Java consumers (Python/Go/Rust/FFM) bind `df_scan_*` directly and skip the
+JNI and Spark layers entirely.
+
+## Components
+
+| Path | Crate / module | Role |
+| --- | --- | --- |
+| `native-ffi/` | `datafusion-scan-ffi` (cdylib + rlib) | The plain-C scan ABI; scan core; provider registry; demo + `datafusion.listing` providers |
+| `native-jni/` | `datafusion-scan-jni` (cdylib) | Thin JNI shim over the scan core |
+| `core/.../scan/` | part of `datafusion-java` | `NativeScan` (native decls), `ScanNativeLoader`, `DatafusionScan` (JVM API) |
+| `spark/` | `datafusion-spark` (Java) | The Spark `DataSourceV2` connector |
+| `proto/` | shared | `scan_config.proto`, `scan_request.proto` |
+
+## The plain-C ABI (`native-ffi/include/datafusion_scan.h`)
+
+```c
+uint64_t df_scan_abi_version(void);
+void df_error_free(char* err);
+
+int32_t df_scan_schema(DfStr provider, DfBytes options, DfBytes partition,
+ struct ArrowSchema* out_schema, char** out_err);
+int32_t df_scan_create(DfStr provider, DfBytes options, DfBytes partition,
+ int32_t target_partitions, int32_t batch_size, int64_t limit,
+ const DfKeyValue* config_overrides, size_t config_overrides_len,
+ const DfStr* projection, size_t projection_len,
+ const DfBytes* filters, size_t filters_len,
+ DfScanHandle** out_handle, char** out_err);
+int32_t df_scan_partition_count(const DfScanHandle*, int32_t* out_count, char** out_err);
+int32_t df_scan_execute_partition(const DfScanHandle*, int32_t partition,
+ struct ArrowArrayStream* out_stream, char** out_err);
+int32_t df_scan_execute(const DfScanHandle*, struct ArrowArrayStream* out_stream, char** out_err);
+void df_scan_close(DfScanHandle*);
+```
+
+Conventions: every fallible call returns `0` / nonzero `DfStatus`, writing a
+malloc'd message to `*out_err` (freed by `df_error_free`). The only "rich" types
+crossing are the standard Arrow C structs `ArrowSchema` / `ArrowArrayStream`.
+Each call is wrapped in `catch_unwind` so a Rust panic becomes a status code,
+never an unwind across the C boundary.
+
+Providers are registered by name (`register_provider`) and select via the
+`provider` argument; the `options`/`partition` blobs are opaque to the ABI and
+decoded by the registered builder.
+
+## Wire formats (`proto/`)
+
+- **`ScanConfig`** — the `options` blob: `provider` name + a `source` oneof
+ (`ListingSource` reusing the per-format read-option messages, or a `custom`
+ bytes escape hatch). `ScanPartition` is the per-partition `partition` blob.
+- **`ScanRequest`** — the engine's pushdown: `projection` (column names),
+ `filters` (each a serialized `datafusion.LogicalExprNode`), `limit`,
+ `target_partitions`, `batch_size`, `config_overrides`.
+
+`ScanRequest` is decoded by the JNI shim and exploded into `df_scan_create`'s
+typed C arguments, rather than passed as one blob — keeping the C ABI typed and
+FFM-friendly. Filters reuse DataFusion's own `LogicalExprNode` proto, so the
+Java side generates builders and the Rust side decodes with the stock codec from
+the same `.proto` — and the encoding is shared with any future Comet path.
+
+## JNI shim (`native-jni` + `core/.../scan`)
+
+Six `Java_…NativeScan_*` methods: `providerSchema`, `createScan`,
+`partitionCount`, `executeStreamPartition`, `executeStream`, `closeScan`. Each
+marshals a `String` + `byte[]`s and `long` addresses; the data plane writes an
+`FFI_ArrowArrayStream` into the arrow-java-allocated struct. `DatafusionScan`
+wraps these and returns an `ArrowReader` via `Data.importArrayStream`, mirroring
+`core`'s existing `DataFrame#collect`.
+
+## Arrow version strategy (the key integration decision)
+
+`ArrowColumnVector` is zero-copy only if the vectors we hand it are the **same
+arrow-java classes** Spark loaded — i.e. one Arrow in the executor JVM. So the
+connector treats arrow-java as **`provided`**: the cluster supplies it, our
+stream import and Spark's `ArrowColumnVector` share it, and columnar works with
+whatever Arrow the deployment ships (within an API-compatible window of the
+compile baseline, currently Spark 4.0's Arrow 18.1).
+
+Consequences:
+
+- **`datafusion-java` (core) stays on Arrow 19** for standalone use; only its
+ Arrow transitive is excluded from the Spark module. No main downgrade.
+- **The Rust side is unaffected.** The Arrow C Data interface is a stable spec,
+ independent of Arrow library version: `arrow-rs 58` producing an
+ `FFI_ArrowArrayStream` imports into arrow-java 18 or 19 alike. Verified by the
+ JVM round-trip test.
+
+## Spark DataSourceV2 mapping
+
+| Spark interface | Our class | Behaviour |
+| --- | --- | --- |
+| `TableProvider`, `DataSourceRegister` | `DatafusionTableProvider` | `"datafusion"` short name; `inferSchema` probes via `df_scan_schema` |
+| `Table`, `SupportsRead` | `DatafusionTable` | `BATCH_READ` capability |
+| `ScanBuilder` + `SupportsPushDown{RequiredColumns,Filters,Limit}` | `DatafusionScanBuilder` | encodes projection / filters / limit into `ScanRequest` |
+| `Scan`, `Batch` | `DatafusionScanImpl` | plans once on the driver for partition count |
+| `InputPartition` | `DatafusionInputPartition` | **serializable**: carries config + request bytes + index, never a native handle |
+| `PartitionReaderFactory` | `DatafusionPartitionReaderFactory` | columnar reads |
+| `PartitionReader` | `DatafusionColumnarPartitionReader` | wraps imported Arrow vectors in `ArrowColumnVector`, zero-copy |
+
+Helpers: `OptionsCodec` (Spark options → `ScanConfig`), `SchemaConverter` (Arrow
+schema → Spark `StructType`, using only our Arrow types), `SparkFilters` (Spark
+`Filter`s → `LogicalExprNode`: comparisons, `And`/`Or`/`Not`, `IsNull`/
+`IsNotNull` over primitive literals; anything else falls back to Spark).
+
+**Partition serialization constraint:** a native handle is meaningless in
+another executor process, so partitions carry only bytes + an index, and each
+executor rebuilds the provider and runs its own partition. A limited plan
+coalesces to one partition, so `pushLimit` can report the bound as fully
+handled.
+
+## Testing
+
+| Level | Where | Proves |
+| --- | --- | --- |
+| Rust ABI round-trip | `native-ffi/tests/roundtrip.rs` | `df_scan_*` + import the stream back via the Arrow C Stream interface; partition count; limit; error/status |
+| Rust proto | `native-ffi/tests/proto.rs` | `ScanConfig`/`ScanRequest` encode/decode incl. embedded read-options |
+| Rust listing | `native-ffi/tests/listing.rs` | real `ListingTable` over a CSV, schema inference, full scan |
+| JVM scan | `core/.../scan/DatafusionScanTest` | end-to-end Java → JNI → Arrow C Stream; schema, scan, projection, filter, limit (closes the arrow-rs 58 ↔ arrow-java 19 ABI question) |
+| Spark unit | `spark/.../DatafusionScanBuilderTest` | decodes the built `ScanRequest` to prove pushdown is actually encoded (isolated from Spark's own handling) |
+| Spark E2E | `spark/.../DatafusionSourceTest` | local `SparkSession` over `format("datafusion")`: schema, full scan, projection, filter, limit on Spark 4.0 columnar |
+
+## Decisions log
+
+- **Approach A over `datafusion-ffi` import.** `datafusion-ffi` already exposes
+ the whole `TableProvider`, but over stabby vtables + an async, poll-based
+ `FFI_RecordBatchStream` — not Java-consumable and not flat C. Compiling
+ providers in and exporting flat C is simpler and is exactly the shape Dewey
+ asked for. The async surface would only be needed to load *third-party*
+ provider cdylibs (a future option B).
+- **Plain C + thin JNI, not JNI-in-the-cdylib.** Keeps the reusable artifact
+ language-neutral and upstreamable; quarantines the JVM into a ~6-method shim.
+- **Row-based → columnar.** Shipped row-based first to decouple from Spark's
+ Arrow, then moved to columnar once the `provided`-Arrow strategy removed the
+ version clash. Columnar is zero-copy; row-based is gone.
+- **Spark 4.0 / Arrow 18.1 baseline, Java.** Java matches the rest of the stack;
+ Spark 4.0's Arrow (18.1) is close to ours and Java-17 native.
+
+## Status and gaps
+
+Built and green end to end: the plain-C ABI, the JNI shim, and a columnar Spark
+4.0 connector with projection / filter / limit pushdown.
+
+Not yet done:
+
+- **Multi-partition coverage.** The executor-rebuild path is wired but exercised
+ only at one partition (single CSV); a directory/Parquet test would cover N>1.
+- **Native library packaging.** The shim loads from `java.library.path`;
+ classpath bundling per OS/arch (as `core` does for `datafusion_jni`) is left
+ to release packaging.
+- **Format breadth.** CSV options are fully mapped; Parquet/Avro/Arrow use
+ defaults.
+- **External provider cdylibs (option B).** Loading third-party providers over
+ `datafusion-ffi`'s `ForeignTableProvider` is not implemented.
+
+## Alternative / companion front-end: ADBC
+
+A reviewer suggested exposing arbitrary DataFusion `TableProvider`s over
+[ADBC](https://arrow.apache.org/adbc/) (Arrow Database Connectivity) instead of —
+or alongside — this scan ABI. The two are not mutually exclusive: they are two
+front-ends over the same core, serving different consumers.
+
+### What this PR's work reuses
+
+The PR already cleaves at the right seam. Three layers, and the valuable two are
+front-end-agnostic:
+
+| Layer | ADBC reuse |
+| --- | --- |
+| Exec core (`scan.rs`, `reader.rs`, `runtime.rs`) — build provider → register on `SessionContext` → plan → `ExecutionPlan` → partition stream → `FFI_ArrowArrayStream` | **Direct reuse.** Already JVM-free and C-free. |
+| Provider registry (`registry.rs`) — register `TableProvider` by name, build on demand | **Direct reuse.** This *is* the "arbitrary providers" mechanism. |
+| `native-common` (errors, tokio handle); panic→status `catch_unwind` pattern | Reuse concept; ADBC has its own error struct. |
+| `df_scan_*` flat C ABI, proto pushdown (`ScanRequest` / `SparkFilters` / `LogicalExprNode`), JNI shim, `core/scan/*`, `spark/*` | **Not reused.** Scan-, JVM-, and Spark-specific. |
+
+`reader.rs`'s `StreamingReader` (DataFusion `SendableRecordBatchStream` →
+`ArrowArrayStream`) is exactly what ADBC's `AdbcStatementExecuteQuery` returns:
+the data plane is identical, and the cross-implementation Arrow C Stream question
+this PR already answered carries over unchanged.
+
+### What ADBC adds, and what it drops
+
+ADBC mandates a fixed, large C surface — `AdbcDatabase` / `AdbcConnection` /
+`AdbcStatement` lifecycle, option getters/setters, metadata calls, an
+`AdbcDriverInit` entry point. You do **not** hand-write that vtable: the official
+`adbc_core` Rust crate supplies `Database` / `Connection` / `Statement` traits
+plus an `export_driver!` macro that generates the C ABI. So the FFI layer becomes
+trait glue, not a second hand-written boundary.
+
+New work:
+
+- `adbc_core` dependency + three trait impls. `Database` holds config + registered
+ providers; `Connection` wraps a `SessionContext`; `Statement` holds SQL + bound
+ params and, on execute, runs `ctx.sql(q)` → physical plan → the existing
+ `StreamingReader`.
+- Catalog metadata methods (`GetObjects` / `GetTableSchema` / `GetTableTypes` /
+ `GetInfo`) → DataFusion `CatalogProvider` / `SchemaProvider` introspection.
+- ADBC error / status mapping in place of `DfStatus`.
+- Optional: parameter binding / prepared statements; `ExecutePartitions` (maps
+ cleanly onto the existing plan-partition logic); ingest/write (likely out of
+ scope).
+- Driver packaging (a manifest so `adbc_driver_manager` can load the library).
+
+Dropped relative to the Spark path: the protobuf pushdown machinery
+(`ScanRequest`, `SparkFilters`, `LogicalExprNode` encoding) is unneeded — ADBC
+clients send SQL and DataFusion's optimizer does pushdown internally — as are the
+JNI shim, `core/scan`, and the Spark module.
+
+### Suggested layout for both
+
+```
+native-common/ errors, tokio runtime [shared]
+native-exec-core/ provider registry + plan/exec [shared] ← lift scan.rs/reader.rs/registry.rs here
+ ├─ native-ffi/ df_scan_* flat C (+ JNI/Spark) [exists]
+ └─ native-adbc/ adbc_core trait impls [new]
+```
+
+One refactor on the existing side: lift `scan.rs` / `reader.rs` / `registry.rs`
+out of `native-ffi` into a shared `native-exec-core` crate that both front-ends
+depend on; `native-ffi` keeps only `abi.rs` + proto. Low churn — those modules
+are already free of C/JVM concerns by design.
+
+### Why keep both rather than collapse to one
+
+Different consumers. `df_scan_*` is a bespoke, scan-only ABI with **explicit**
+pushdown: every consumer hand-binds it, but it can carry Spark's pre-resolved
+predicates without a SQL round-trip. ADBC is a SQL-oriented **standard** ABI:
+bigger mandated surface, but the whole client ecosystem (Python
+`adbc_driver_manager`, R, Go, the JDBC↔ADBC bridge) comes for free.
+
+They are not redundant, because Spark's pre-resolved pushdown does not always
+re-serialize to a SQL string:
+
+- **Lossy but rescuable** (within current filter scope): float/double literals
+ (decimal-text render loses exact IEEE bits), `NaN`/`±Inf` (no SQL literal),
+ decimal precision/scale, binary/non-UTF8 literals, null-safe equality
+ (`<=>` → `IS NOT DISTINCT FROM`), identifier quoting/case. ADBC parameter
+ binding (`WHERE col = ?` with a typed bound value) closes most of the literal
+ cases.
+- **Structurally impossible**: pushdown whose value is not known at
+ statement-prepare time — dynamic partition pruning, runtime/bloom filters from
+ joins — cannot be a static SQL string, and binding does not help because the
+ value arrives mid-execution. This PR pushes none of these yet, but it is the
+ reason a typed-`Expr` scan ABI is not merely a convenience over SQL: it is the
+ only path that can carry runtime filters at all.
+
+So the recommendation is a shared `native-exec-core` with two thin front-ends:
+ADBC for SQL clients across the Arrow ecosystem, the flat-C scan ABI for
+embedders (Spark today) that push pre-resolved or runtime predicates.
diff --git a/docs/source/contributor-guide/development.md b/docs/source/contributor-guide/development.md
index 984d77c..61d4fb0 100644
--- a/docs/source/contributor-guide/development.md
+++ b/docs/source/contributor-guide/development.md
@@ -42,7 +42,7 @@ This builds the native Rust crate and runs the JUnit tests. The steps can
be run individually:
```sh
-cd native && cargo build
+cargo build --workspace
./mvnw test
```
@@ -74,6 +74,11 @@ disk space.
The repository is a multi-module Maven build:
+- `Cargo.toml` — Rust workspace root declaring the crate members
+ (`native`, `native-common`) and `[workspace.dependencies]` that pin
+ shared versions in one place. Cargo writes artifacts to `rust-target/`
+ (overridden in `.cargo/config.toml`) so `mvn clean` at the repo root does
+ not nuke the Rust build cache.
- `pom.xml` — parent POM declaring the `core` and `examples` modules and
shared plugin/dependency versions.
- `core/` — `datafusion-java` library module (Java sources, tests, and
@@ -81,7 +86,10 @@ The repository is a multi-module Maven build:
- `examples/` — `datafusion-java-examples` module containing runnable
examples that depend on the library; built alongside the library so they
cannot fall out of sync with the API.
-- `native/` — Rust crate (JNI + Arrow C Data Interface).
+- `native/` — `datafusion-jni` Rust crate (JNI + Arrow C Data Interface).
+- `native-common/` — `datafusion-jni-common` Rust crate: JNI plumbing
+ shared across native crates (error→exception mapping, the per-cdylib
+ Tokio runtime singleton, the async-stream→`FFI_ArrowArrayStream` bridge).
- `proto/` — Protobuf definitions shared between Java and Rust.
- `Makefile` — top-level build orchestration (`make test`, `make format`,
`make tpch-data`).
diff --git a/docs/source/contributor-guide/updating-datafusion-version.md b/docs/source/contributor-guide/updating-datafusion-version.md
index 56d50dc..6e3b90b 100644
--- a/docs/source/contributor-guide/updating-datafusion-version.md
+++ b/docs/source/contributor-guide/updating-datafusion-version.md
@@ -21,7 +21,9 @@ under the License.
Three things must move together when bumping DataFusion:
-1. `native/Cargo.toml` — the `datafusion` crate dependency.
+1. `Cargo.toml` (workspace root) — the `datafusion`, `datafusion-proto`,
+ `datafusion-spark`, and `datafusion-substrait` entries in
+ `[workspace.dependencies]`. Members inherit from there.
2. `pom.xml` — the `` Maven property. **Must equal
the Cargo version**; a mismatch means JVM-built protobuf plans won't
deserialize on the native side.
@@ -32,9 +34,9 @@ Three things must move together when bumping DataFusion:
## Recipe
```sh
-# 1. Bump the Cargo dep
-$EDITOR native/Cargo.toml # set datafusion = ""
-(cd native && cargo update -p datafusion)
+# 1. Bump the workspace dep
+$EDITOR Cargo.toml # set datafusion = "" in [workspace.dependencies]
+cargo update -p datafusion
# 2. Bump the Maven property to match
$EDITOR pom.xml # set
diff --git a/native-common/Cargo.toml b/native-common/Cargo.toml
new file mode 100644
index 0000000..21a2296
--- /dev/null
+++ b/native-common/Cargo.toml
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-jni-common"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+# Implementation detail of datafusion-java's native crates, not a standalone
+# crates.io library. Matches `publish = false` on the `datafusion-jni` crate.
+publish = false
+readme = "README.md"
+description = "Shared JNI plumbing for DataFusion Java native crates: error-to-exception mapping, the per-cdylib Tokio runtime singleton, and the async-stream-to-FFI_ArrowArrayStream bridge."
+
+[features]
+# `datafusion-jni` builds DataFusion with `avro`, which adds the
+# `DataFusionError::AvroError` variant our classifier maps to IoException.
+# Feature-forwarded so consumers that don't read Avro (the Spark helper)
+# don't pull the apache-avro stack into their cdylib.
+avro = ["datafusion/avro"]
+
+[dependencies]
+datafusion = { workspace = true }
+futures = { workspace = true }
+jni = { workspace = true }
+tokio = { workspace = true }
diff --git a/native-common/README.md b/native-common/README.md
new file mode 100644
index 0000000..aadf877
--- /dev/null
+++ b/native-common/README.md
@@ -0,0 +1,37 @@
+
+
+# datafusion-jni-common
+
+Shared JNI plumbing for the [Apache DataFusion Java](https://github.com/apache/datafusion-java)
+native crates. It holds the pieces every DataFusion-backed `cdylib` loaded into a
+JVM needs, factored out so they live in one place.
+
+## Linking model
+
+Each consuming `cdylib` statically links its own copy of this crate, so the
+runtime singleton is per-library, not per-process. Nothing here is exported with
+`#[no_mangle]`, so linking it into several `cdylib`s loaded in one JVM cannot
+collide.
+
+## Status
+
+This crate is an implementation detail of Apache DataFusion Java. Its API may
+change between releases to track the needs of the native crates that depend on
+it.
diff --git a/native/src/errors.rs b/native-common/src/errors.rs
similarity index 95%
rename from native/src/errors.rs
rename to native-common/src/errors.rs
index d926544..f9dbb03 100644
--- a/native/src/errors.rs
+++ b/native-common/src/errors.rs
@@ -96,8 +96,11 @@ fn classify(err: &DataFusionError) -> &'static str {
}
DataFusionError::IoError(_)
| DataFusionError::ObjectStore(_)
- | DataFusionError::ParquetError(_)
- | DataFusionError::AvroError(_) => "org/apache/datafusion/IoException",
+ | DataFusionError::ParquetError(_) => "org/apache/datafusion/IoException",
+ // The AvroError variant only exists when DataFusion is built with its
+ // `avro` feature, forwarded by this crate's own `avro` feature.
+ #[cfg(feature = "avro")]
+ DataFusionError::AvroError(_) => "org/apache/datafusion/IoException",
// ArrowError is a 21-variant grab bag -- only some of those variants
// are actually IO-shaped. DivideByZero / ArithmeticOverflow / Compute
// / Cast / InvalidArgument / Memory etc. are execution-time failures
@@ -161,7 +164,10 @@ fn throw(env: &mut JNIEnv, class: &str, message: &str) {
let _ = env.throw_new(class, message);
}
-fn panic_message(panic: &Box) -> String {
+/// Best-effort extraction of a panic payload's message. `catch_unwind` hands
+/// back a `Box`; the payload is a `String` or `&str` for ordinary
+/// `panic!`/`unwrap` sites, anything else is opaque.
+pub fn panic_message(panic: &Box) -> String {
if let Some(s) = panic.downcast_ref::() {
s.clone()
} else if let Some(s) = panic.downcast_ref::<&str>() {
diff --git a/native-common/src/lib.rs b/native-common/src/lib.rs
new file mode 100644
index 0000000..ba47004
--- /dev/null
+++ b/native-common/src/lib.rs
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! JNI plumbing shared by this workspace's native crates (`datafusion-jni`
+//! and `datafusion-spark-bridge`, and through the latter every bridge
+//! cdylib): the error-to-Java-exception mapping, the per-cdylib Tokio
+//! runtime singleton, and the async-stream-to-`FFI_ArrowArrayStream`
+//! bridge.
+//!
+//! Each cdylib statically links its own copy of this rlib, so [`runtime`] is
+//! a per-cdylib singleton -- exactly the behaviour each crate had when this
+//! code lived inline. Nothing here is exported with `#[no_mangle]`, so
+//! linking this crate into several cdylibs loaded in one JVM cannot collide.
+
+pub mod errors;
+
+use std::panic::{catch_unwind, AssertUnwindSafe};
+use std::sync::OnceLock;
+
+use datafusion::arrow::array::RecordBatch;
+use datafusion::arrow::datatypes::SchemaRef;
+use datafusion::arrow::error::ArrowError;
+use datafusion::arrow::record_batch::RecordBatchReader;
+use datafusion::execution::SendableRecordBatchStream;
+use futures::StreamExt;
+use tokio::runtime::{Handle, Runtime};
+
+static RT: OnceLock = OnceLock::new();
+
+/// The cdylib-wide Tokio runtime.
+pub fn runtime() -> &'static Runtime {
+ runtime_with_init(|_| {})
+}
+
+/// Same singleton as [`runtime`], with a hook that runs exactly once, when
+/// the runtime is created. `datafusion-jni` uses it to install its
+/// runtime-metrics accumulator so the sampling baseline coincides with
+/// runtime start; every later call (either entry point) returns the existing
+/// runtime without invoking the hook.
+pub fn runtime_with_init(init: impl FnOnce(&Handle)) -> &'static Runtime {
+ RT.get_or_init(|| {
+ let rt = Runtime::new().expect("failed to create Tokio runtime");
+ init(rt.handle());
+ rt
+ })
+}
+
+/// Bridges DataFusion's async [`SendableRecordBatchStream`] to the synchronous
+/// [`RecordBatchReader`] interface that `FFI_ArrowArrayStream` (and therefore
+/// the Java `ArrowReader`) consumes. Each call to `next()` drives one
+/// `runtime().block_on(stream.next())`, so memory pressure stays bounded by the
+/// executor pipeline plus a single in-flight batch.
+pub struct StreamingReader {
+ pub schema: SchemaRef,
+ pub stream: SendableRecordBatchStream,
+}
+
+impl Iterator for StreamingReader {
+ type Item = Result;
+
+ fn next(&mut self) -> Option {
+ // Arrow's C ABI invokes this iterator through FFI_ArrowArrayStream's
+ // vtable, outside the JNI handler's try_unwrap_or_throw guard. A panic
+ // here (buggy UDF, arrow cast that panics, runtime poison) would
+ // unwind across C/FFI -- undefined behaviour. Catch it and surface as
+ // an ArrowError so the Java side sees a normal exception instead.
+ let next = catch_unwind(AssertUnwindSafe(|| runtime().block_on(self.stream.next())));
+ match next {
+ Ok(item) => item.map(|r| r.map_err(|e| ArrowError::ExternalError(Box::new(e)))),
+ Err(panic) => {
+ let msg = errors::panic_message(&panic);
+ Some(Err(ArrowError::ExternalError(
+ format!("panic in DataFrame stream: {msg}").into(),
+ )))
+ }
+ }
+ }
+}
+
+impl RecordBatchReader for StreamingReader {
+ fn schema(&self) -> SchemaRef {
+ self.schema.clone()
+ }
+}
diff --git a/native-ffi/Cargo.toml b/native-ffi/Cargo.toml
new file mode 100644
index 0000000..cd97d2b
--- /dev/null
+++ b/native-ffi/Cargo.toml
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-scan-ffi"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+# Not published yet; this is the in-tree home of the plain-C scan ABI while it
+# stabilizes. The intent is for this surface to eventually live in DataFusion
+# proper (it has no JVM/JNI dependency), so keep it free of anything
+# Java-specific.
+publish = false
+
+[lib]
+# `cdylib` -> the shippable plain-C shared library (`libdatafusion_scan_ffi`).
+# `rlib` -> lets a downstream cdylib statically link this crate, register
+# its own providers, and re-export the `df_scan_*` symbols; also
+# gives `cargo test` a Rust harness that round-trips the ABI with
+# no JVM in sight.
+crate-type = ["cdylib", "rlib"]
+
+[features]
+# A built-in in-memory provider builder registered under `datafusion.memory`,
+# used by the round-trip tests and handy as a reference builder. Off by default
+# so a production cdylib only carries the providers it registers itself.
+demo-providers = []
+
+[dependencies]
+# The arrow C Data / C Stream interface types are the entire data plane of this
+# ABI. `ffi` pulls in both `arrow::ffi` (FFI_ArrowSchema/Array) and
+# `arrow::ffi_stream` (FFI_ArrowArrayStream). Same crate+version DataFusion
+# links, so the types unify.
+arrow = { workspace = true }
+# `avro` enables AvroFormat for the listing provider; parquet/csv/json/arrow
+# formats are on by default.
+datafusion = { workspace = true, features = ["avro"] }
+# Pushed filters arrive as serialized `datafusion.LogicalExprNode` protobufs --
+# the same vocabulary `datafusion-ffi` already uses, so the encoder is shared
+# with any future Comet path.
+datafusion-proto = { workspace = true }
+futures = { workspace = true }
+prost = { workspace = true }
+tokio = { workspace = true }
+
+[dev-dependencies]
+# Round-trip tests import the produced FFI_ArrowArrayStream back into Rust via
+# the same C Stream interface a Java/Python/Go consumer would use.
+datafusion-scan-ffi = { path = ".", features = ["demo-providers"] }
+
+[build-dependencies]
+# Compiles scan_config.proto / scan_request.proto (and the per-format read
+# option messages they embed) into Rust so provider builders can decode the
+# `options` blob. Mirrors `native/build.rs`.
+prost-build = { workspace = true }
+protoc-bin-vendored = { workspace = true }
diff --git a/native-ffi/build.rs b/native-ffi/build.rs
new file mode 100644
index 0000000..a1be583
--- /dev/null
+++ b/native-ffi/build.rs
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+fn main() {
+ // scan_config.proto embeds the per-format read-option messages, which in
+ // turn import file_compression_type; every transitively-referenced file
+ // must be compiled so the generated `ScanConfig` has its field types.
+ const PROTOS: &[&str] = &[
+ "../proto/scan_config.proto",
+ "../proto/scan_request.proto",
+ "../proto/file_compression_type.proto",
+ "../proto/csv_read_options.proto",
+ "../proto/json_read_options.proto",
+ "../proto/parquet_read_options.proto",
+ "../proto/avro_read_options.proto",
+ "../proto/arrow_read_options.proto",
+ ];
+ for p in PROTOS {
+ println!("cargo:rerun-if-changed={p}");
+ }
+ // Honor a caller-provided PROTOC (e.g. a system install) and otherwise fall
+ // back to the vendored binary, matching `native/build.rs`.
+ if std::env::var_os("PROTOC").is_none() {
+ let protoc = protoc_bin_vendored::protoc_bin_path().expect("vendored protoc not available");
+ std::env::set_var("PROTOC", protoc);
+ }
+ prost_build::compile_protos(PROTOS, &["../proto"]).expect("failed to compile protos");
+}
diff --git a/native-ffi/include/datafusion_scan.h b/native-ffi/include/datafusion_scan.h
new file mode 100644
index 0000000..afa6a2e
--- /dev/null
+++ b/native-ffi/include/datafusion_scan.h
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Plain-C scan ABI over the Arrow C Data / C Stream interface.
+//
+// The only "rich" types crossing this boundary are the standard Arrow C
+// structs `ArrowSchema` and `ArrowArrayStream` (from Arrow's abi.h), which any
+// Arrow implementation can produce/consume. Everything else is C primitives
+// and borrowed (ptr, len) views. No JVM/JNI types appear here, by design.
+
+#ifndef DATAFUSION_SCAN_H
+#define DATAFUSION_SCAN_H
+
+#include
+#include
+
+#include "arrow/c/abi.h" // struct ArrowSchema, struct ArrowArrayStream
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// --- Status codes ----------------------------------------------------------
+// 0 on success; nonzero classifies the failure. On error the call also writes
+// a malloc'd, NUL-terminated message to *out_err (free with df_error_free).
+typedef enum {
+ DF_OK = 0,
+ DF_INVALID_ARGUMENT = 1,
+ DF_UNKNOWN_PROVIDER = 2,
+ DF_PROVIDER_BUILD = 3,
+ DF_PLANNING = 4,
+ DF_EXECUTION = 5,
+ DF_PANIC = 6,
+ DF_INTERNAL = 7
+} DfStatus;
+
+// --- Borrowed input views (caller owns the memory) -------------------------
+typedef struct {
+ const uint8_t* ptr; // UTF-8, not NUL-terminated; may be null if len == 0
+ size_t len;
+} DfStr;
+
+typedef struct {
+ const uint8_t* ptr; // may be null if len == 0
+ size_t len;
+} DfBytes;
+
+typedef struct {
+ DfStr key;
+ DfStr value;
+} DfKeyValue;
+
+// Opaque planned-scan handle.
+typedef struct DfScanHandle DfScanHandle;
+
+// --- Lifecycle / versioning ------------------------------------------------
+
+// ABI major version; compare before any other call.
+uint64_t df_scan_abi_version(void);
+
+// Free a message previously written to an out_err argument (null-safe).
+void df_error_free(char* err);
+
+// --- Scan API --------------------------------------------------------------
+
+// Probe a provider's output schema into the caller-allocated out_schema.
+int32_t df_scan_schema(DfStr provider, DfBytes options, DfBytes partition,
+ struct ArrowSchema* out_schema, char** out_err);
+
+// Plan a scan. On success writes an owned handle to *out_handle (release with
+// df_scan_close). projection is an array of column-name DfStr (empty = all);
+// filters is an array of serialized datafusion.LogicalExprNode DfBytes;
+// target_partitions / batch_size <= 0 keep DataFusion defaults; limit < 0 means
+// no row limit.
+int32_t df_scan_create(DfStr provider, DfBytes options, DfBytes partition,
+ int32_t target_partitions, int32_t batch_size, int64_t limit,
+ const DfKeyValue* config_overrides, size_t config_overrides_len,
+ const DfStr* projection, size_t projection_len,
+ const DfBytes* filters, size_t filters_len,
+ DfScanHandle** out_handle, char** out_err);
+
+// Output partition count of the planned scan.
+int32_t df_scan_partition_count(const DfScanHandle* handle, int32_t* out_count,
+ char** out_err);
+
+// Execute one partition into the caller-allocated Arrow C Stream.
+int32_t df_scan_execute_partition(const DfScanHandle* handle, int32_t partition,
+ struct ArrowArrayStream* out_stream, char** out_err);
+
+// Execute the whole plan as a single coalesced Arrow C Stream.
+int32_t df_scan_execute(const DfScanHandle* handle,
+ struct ArrowArrayStream* out_stream, char** out_err);
+
+// Drop a planned scan (null-safe). Must not race an in-flight execute on the
+// same handle.
+void df_scan_close(DfScanHandle* handle);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // DATAFUSION_SCAN_H
diff --git a/native-ffi/src/abi.rs b/native-ffi/src/abi.rs
new file mode 100644
index 0000000..f037ad5
--- /dev/null
+++ b/native-ffi/src/abi.rs
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! The plain-C front door: `extern "C"` entry points over C and Arrow C types.
+//!
+//! No `JNIEnv`, no JVM types, no name mangling -- the exported symbols are
+//! `df_scan_*` / `df_error_*` and the only "rich" types that cross are the
+//! standard Arrow C Data (`ArrowSchema`) and C Stream (`ArrowArrayStream`)
+//! structs. A Java consumer reaches these through a ~2-method JNI shim or the
+//! JDK 22+ FFM API; Python/Go/R/Rust reach them directly.
+//!
+//! Convention: every fallible call returns `0` on success and a nonzero
+//! [`DfStatus`](crate::error::DfStatus) on failure, writing a malloc'd message
+//! to `*out_err` (freed via [`df_error_free`]). Each is wrapped in
+//! `catch_unwind` so a Rust panic becomes [`DfStatus::Panic`] instead of
+//! unwinding across the C boundary (UB).
+
+use std::ffi::c_char;
+use std::os::raw::c_int;
+use std::panic::{catch_unwind, AssertUnwindSafe};
+
+use datafusion::arrow::ffi::FFI_ArrowSchema;
+use datafusion::arrow::ffi_stream::FFI_ArrowArrayStream;
+
+use crate::error::{finish, report, DfStatus, ScanError, ScanResult};
+use crate::ffi_types::{array, DfBytes, DfKeyValue, DfStr};
+use crate::reader::panic_message;
+use crate::scan::{self, ScanHandle, ScanRequest};
+
+/// Opaque handle to a planned scan. Created by [`df_scan_create`], freed by
+/// [`df_scan_close`]. Never dereferenced by the consumer.
+pub struct DfScanHandle {
+ inner: ScanHandle,
+}
+
+/// Run `body`, turning a caught panic into a [`DfStatus::Panic`] status.
+///
+/// # Safety
+/// `out_err` must be null or a writable `*mut *mut c_char`.
+unsafe fn guard(out_err: *mut *mut c_char, body: impl FnOnce() -> ScanResult<()>) -> c_int {
+ match catch_unwind(AssertUnwindSafe(body)) {
+ Ok(result) => finish(out_err, result),
+ Err(p) => report(
+ out_err,
+ ScanError::new(
+ DfStatus::Panic,
+ format!("panic in datafusion-scan-ffi: {}", panic_message(&p)),
+ ),
+ ),
+ }
+}
+
+/// Major version of the ABI. A consumer compares this against the value it was
+/// compiled for before calling anything else.
+#[no_mangle]
+pub extern "C" fn df_scan_abi_version() -> u64 {
+ crate::ABI_VERSION
+}
+
+/// Free an error string previously written to an `out_err` argument. Safe to
+/// call with null.
+///
+/// # Safety
+/// `err` must be null or a pointer previously returned through `out_err` by
+/// one of the `df_scan_*` calls, and must not be used afterwards.
+#[no_mangle]
+pub unsafe extern "C" fn df_error_free(err: *mut c_char) {
+ if !err.is_null() {
+ drop(std::ffi::CString::from_raw(err));
+ }
+}
+
+/// Probe a provider's output schema, writing an Arrow C Schema into the
+/// caller-allocated `out_schema`.
+///
+/// # Safety
+/// All pointer args follow the documented `(ptr, len)` borrow contract;
+/// `out_schema` must point to a writable, uninitialized `ArrowSchema`.
+#[no_mangle]
+pub unsafe extern "C" fn df_scan_schema(
+ provider: DfStr,
+ options: DfBytes,
+ partition: DfBytes,
+ out_schema: *mut FFI_ArrowSchema,
+ out_err: *mut *mut c_char,
+) -> c_int {
+ guard(out_err, || {
+ if out_schema.is_null() {
+ return Err(ScanError::invalid_argument("out_schema is null"));
+ }
+ let name = provider.as_str()?;
+ let schema = scan::schema(name, options.as_slice(), partition.as_slice())?;
+ let ffi = FFI_ArrowSchema::try_from(schema.as_ref())?;
+ std::ptr::write(out_schema, ffi);
+ Ok(())
+ })
+}
+
+/// Plan a scan. On success writes an owned [`DfScanHandle`] pointer to
+/// `*out_handle`; the caller must release it with [`df_scan_close`].
+///
+/// Session config overrides are a single `config_overrides` array of
+/// [`DfKeyValue`]. `projection` is an array of column-name [`DfStr`]s (empty
+/// selects all). `filters` is an array of serialized `datafusion.LogicalExprNode`
+/// [`DfBytes`]. `limit` is the pushed row limit; a negative value means none.
+///
+/// # Safety
+/// Array args follow the `(ptr, len)` borrow contract; `out_handle` must be a
+/// writable `*mut *mut DfScanHandle`.
+#[no_mangle]
+#[allow(clippy::too_many_arguments)]
+pub unsafe extern "C" fn df_scan_create(
+ provider: DfStr,
+ options: DfBytes,
+ partition: DfBytes,
+ target_partitions: c_int,
+ batch_size: c_int,
+ limit: i64,
+ config_overrides: *const DfKeyValue,
+ config_overrides_len: usize,
+ projection: *const DfStr,
+ projection_len: usize,
+ filters: *const DfBytes,
+ filters_len: usize,
+ out_handle: *mut *mut DfScanHandle,
+ out_err: *mut *mut c_char,
+) -> c_int {
+ guard(out_err, || {
+ if out_handle.is_null() {
+ return Err(ScanError::invalid_argument("out_handle is null"));
+ }
+ let provider = provider.as_str()?;
+
+ let mut overrides = Vec::with_capacity(config_overrides_len);
+ for kv in array(config_overrides, config_overrides_len) {
+ overrides.push((kv.key.as_str()?.to_string(), kv.value.as_str()?.to_string()));
+ }
+ let mut cols = Vec::with_capacity(projection_len);
+ for s in array(projection, projection_len) {
+ cols.push(s.as_str()?.to_string());
+ }
+ let mut filter_bytes = Vec::with_capacity(filters_len);
+ for b in array(filters, filters_len) {
+ filter_bytes.push(b.as_slice().to_vec());
+ }
+
+ let handle = scan::create(ScanRequest {
+ provider,
+ options: options.as_slice(),
+ partition: partition.as_slice(),
+ target_partitions,
+ batch_size,
+ limit: if limit < 0 { None } else { Some(limit as usize) },
+ config_overrides: overrides,
+ projection: cols,
+ filters: filter_bytes,
+ })?;
+
+ let boxed = Box::new(DfScanHandle { inner: handle });
+ std::ptr::write(out_handle, Box::into_raw(boxed));
+ Ok(())
+ })
+}
+
+/// Number of output partitions of the planned scan.
+///
+/// # Safety
+/// `handle` must be a live pointer from [`df_scan_create`]; `out_count` must be
+/// writable.
+#[no_mangle]
+pub unsafe extern "C" fn df_scan_partition_count(
+ handle: *const DfScanHandle,
+ out_count: *mut c_int,
+ out_err: *mut *mut c_char,
+) -> c_int {
+ guard(out_err, || {
+ let h = handle
+ .as_ref()
+ .ok_or_else(|| ScanError::invalid_argument("scan handle is null"))?;
+ if out_count.is_null() {
+ return Err(ScanError::invalid_argument("out_count is null"));
+ }
+ std::ptr::write(out_count, h.inner.partition_count() as c_int);
+ Ok(())
+ })
+}
+
+/// Execute one plan partition, writing an `FFI_ArrowArrayStream` into the
+/// caller-allocated `out_stream`. The consumer imports it with its Arrow C
+/// Stream importer (e.g. arrow-java `Data.importArrayStream`).
+///
+/// # Safety
+/// `handle` live; `out_stream` points to a writable, uninitialized
+/// `ArrowArrayStream`.
+#[no_mangle]
+pub unsafe extern "C" fn df_scan_execute_partition(
+ handle: *const DfScanHandle,
+ partition: c_int,
+ out_stream: *mut FFI_ArrowArrayStream,
+ out_err: *mut *mut c_char,
+) -> c_int {
+ guard(out_err, || {
+ let h = handle
+ .as_ref()
+ .ok_or_else(|| ScanError::invalid_argument("scan handle is null"))?;
+ if out_stream.is_null() {
+ return Err(ScanError::invalid_argument("out_stream is null"));
+ }
+ if partition < 0 {
+ return Err(ScanError::invalid_argument("partition index is negative"));
+ }
+ let reader = h.inner.execute_partition(partition as usize)?;
+ let ffi = FFI_ArrowArrayStream::new(Box::new(reader));
+ std::ptr::write(out_stream, ffi);
+ Ok(())
+ })
+}
+
+/// Execute the whole plan as a single coalesced stream.
+///
+/// # Safety
+/// As [`df_scan_execute_partition`].
+#[no_mangle]
+pub unsafe extern "C" fn df_scan_execute(
+ handle: *const DfScanHandle,
+ out_stream: *mut FFI_ArrowArrayStream,
+ out_err: *mut *mut c_char,
+) -> c_int {
+ guard(out_err, || {
+ let h = handle
+ .as_ref()
+ .ok_or_else(|| ScanError::invalid_argument("scan handle is null"))?;
+ if out_stream.is_null() {
+ return Err(ScanError::invalid_argument("out_stream is null"));
+ }
+ let reader = h.inner.execute_all()?;
+ let ffi = FFI_ArrowArrayStream::new(Box::new(reader));
+ std::ptr::write(out_stream, ffi);
+ Ok(())
+ })
+}
+
+/// Drop a planned scan. Must not race an in-flight execute on the same handle;
+/// the consumer is responsible for that ordering. Safe to call with null.
+///
+/// # Safety
+/// `handle` must be null or a live pointer from [`df_scan_create`], not used
+/// afterwards.
+#[no_mangle]
+pub unsafe extern "C" fn df_scan_close(handle: *mut DfScanHandle) {
+ if !handle.is_null() {
+ drop(Box::from_raw(handle));
+ }
+}
diff --git a/native-ffi/src/demo.rs b/native-ffi/src/demo.rs
new file mode 100644
index 0000000..ca27f3b
--- /dev/null
+++ b/native-ffi/src/demo.rs
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! A reference in-memory provider builder, gated behind the `demo-providers`
+//! feature. Registered under `datafusion.memory`; the `options` bytes are
+//! ignored. Used by the round-trip tests and as a minimal example of what a
+//! real consumer's builder looks like.
+
+use std::sync::Arc;
+
+use datafusion::arrow::array::{Int64Array, StringArray};
+use datafusion::arrow::datatypes::{DataType, Field, Schema};
+use datafusion::arrow::record_batch::RecordBatch;
+use datafusion::catalog::TableProvider;
+use datafusion::datasource::MemTable;
+use datafusion::prelude::SessionContext;
+
+use crate::error::{DfStatus, ScanError, ScanResult};
+use crate::registry::register_provider;
+
+/// Registered builder name for the demo provider.
+pub const NAME: &str = "datafusion.memory";
+
+/// Register the demo provider. Call once at startup.
+pub fn register() {
+ register_provider(NAME, build);
+}
+
+/// Two-column (`id: Int64`, `name: Utf8`), two-batch in-memory table across
+/// two partitions, so partition-count behavior is observable.
+fn build(
+ _ctx: &SessionContext,
+ _options: &[u8],
+ _partition: &[u8],
+) -> ScanResult> {
+ let schema = Arc::new(Schema::new(vec![
+ Field::new("id", DataType::Int64, false),
+ Field::new("name", DataType::Utf8, true),
+ ]));
+
+ let batch = |ids: Vec, names: Vec<&str>| -> ScanResult {
+ RecordBatch::try_new(
+ schema.clone(),
+ vec![
+ Arc::new(Int64Array::from(ids)),
+ Arc::new(StringArray::from(names)),
+ ],
+ )
+ .map_err(ScanError::from)
+ };
+
+ let p0 = batch(vec![1, 2, 3], vec!["a", "b", "c"])?;
+ let p1 = batch(vec![4, 5], vec!["d", "e"])?;
+
+ MemTable::try_new(schema, vec![vec![p0], vec![p1]])
+ .map(|t| Arc::new(t) as Arc)
+ .map_err(|e| ScanError::new(DfStatus::ProviderBuild, e.to_string()))
+}
diff --git a/native-ffi/src/error.rs b/native-ffi/src/error.rs
new file mode 100644
index 0000000..71d8164
--- /dev/null
+++ b/native-ffi/src/error.rs
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Error model for the C ABI.
+//!
+//! Rust-internal code works with [`ScanError`]; the `extern "C"` layer turns it
+//! into an `i32` [`DfStatus`] return plus a heap-allocated message string. No
+//! Rust error type ever crosses the boundary -- only a code and UTF-8 bytes.
+
+use std::ffi::{c_char, CString};
+use std::os::raw::c_int;
+
+use datafusion::arrow::error::ArrowError;
+use datafusion::error::DataFusionError;
+
+/// Status codes returned by every fallible `df_scan_*` call. `0` is success;
+/// the rest classify the failure coarsely so a consumer can branch without
+/// parsing the message. Stable across an `ABI_VERSION`.
+#[repr(i32)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum DfStatus {
+ Ok = 0,
+ /// A required pointer argument was null, or a length/index was invalid.
+ InvalidArgument = 1,
+ /// `provider` is not a registered builder name.
+ UnknownProvider = 2,
+ /// The provider builder itself failed.
+ ProviderBuild = 3,
+ /// Planning failed (projection, filter decode, physical planning).
+ Planning = 4,
+ /// Stream execution setup failed.
+ Execution = 5,
+ /// A Rust panic was caught at the boundary.
+ Panic = 6,
+ /// Anything not covered above.
+ Internal = 7,
+}
+
+/// Internal error carrying a status class and a human-readable message.
+#[derive(Debug)]
+pub struct ScanError {
+ pub status: DfStatus,
+ pub message: String,
+}
+
+impl ScanError {
+ pub fn new(status: DfStatus, message: impl Into) -> Self {
+ Self {
+ status,
+ message: message.into(),
+ }
+ }
+
+ pub fn invalid_argument(message: impl Into) -> Self {
+ Self::new(DfStatus::InvalidArgument, message)
+ }
+}
+
+impl From for ScanError {
+ fn from(e: DataFusionError) -> Self {
+ Self::new(DfStatus::Planning, e.to_string())
+ }
+}
+
+impl From for ScanError {
+ fn from(e: ArrowError) -> Self {
+ Self::new(DfStatus::Internal, e.to_string())
+ }
+}
+
+impl From for ScanError {
+ fn from(e: prost::DecodeError) -> Self {
+ Self::new(
+ DfStatus::Planning,
+ format!("failed to decode pushed filter as LogicalExprNode: {e}"),
+ )
+ }
+}
+
+pub type ScanResult = Result;
+
+/// Write `err`'s message into `*out_err` as a freshly allocated,
+/// NUL-terminated C string (freed by the caller via `df_error_free`) and
+/// return its status code as `c_int`. `out_err` may be null, in which case the
+/// message is dropped and only the code is returned.
+///
+/// # Safety
+/// `out_err` must be null or point to a writable `*mut c_char`.
+pub unsafe fn report(out_err: *mut *mut c_char, err: ScanError) -> c_int {
+ if !out_err.is_null() {
+ // NUL bytes in the message would truncate it; replace defensively.
+ let sanitized = err.message.replace('\0', "\u{fffd}");
+ match CString::new(sanitized) {
+ Ok(c) => *out_err = c.into_raw(),
+ Err(_) => *out_err = std::ptr::null_mut(),
+ }
+ }
+ err.status as c_int
+}
+
+/// Collapse a `ScanResult<()>` into a status code, reporting any error through
+/// `out_err`.
+///
+/// # Safety
+/// See [`report`].
+pub unsafe fn finish(out_err: *mut *mut c_char, result: ScanResult<()>) -> c_int {
+ match result {
+ Ok(()) => DfStatus::Ok as c_int,
+ Err(e) => report(out_err, e),
+ }
+}
diff --git a/native-ffi/src/ffi_types.rs b/native-ffi/src/ffi_types.rs
new file mode 100644
index 0000000..a892a62
--- /dev/null
+++ b/native-ffi/src/ffi_types.rs
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Borrowed C views passed *into* the ABI.
+//!
+//! These are non-owning `(ptr, len)` pairs: the caller owns the memory and
+//! keeps it valid for the duration of the call. Nothing here is allocated or
+//! freed by Rust. Using explicit `(ptr, len)` slices (rather than
+//! NUL-terminated strings) means the surface is FFM-friendly and binary-safe.
+
+use std::slice;
+
+use crate::error::{ScanError, ScanResult};
+
+/// A borrowed UTF-8 string slice. Not NUL-terminated.
+#[repr(C)]
+#[derive(Clone, Copy)]
+pub struct DfStr {
+ pub ptr: *const u8,
+ pub len: usize,
+}
+
+/// A borrowed byte slice.
+#[repr(C)]
+#[derive(Clone, Copy)]
+pub struct DfBytes {
+ pub ptr: *const u8,
+ pub len: usize,
+}
+
+/// A borrowed `(key, value)` UTF-8 pair, for session config overrides.
+#[repr(C)]
+#[derive(Clone, Copy)]
+pub struct DfKeyValue {
+ pub key: DfStr,
+ pub value: DfStr,
+}
+
+impl DfStr {
+ /// # Safety
+ /// `ptr` must be null or point to `len` valid bytes of UTF-8 that stay
+ /// alive for the borrow.
+ pub unsafe fn as_str(&self) -> ScanResult<&str> {
+ let bytes = self.as_bytes();
+ std::str::from_utf8(bytes)
+ .map_err(|e| ScanError::invalid_argument(format!("argument is not valid UTF-8: {e}")))
+ }
+
+ /// # Safety
+ /// See [`DfStr::as_str`].
+ pub unsafe fn as_bytes(&self) -> &[u8] {
+ if self.ptr.is_null() || self.len == 0 {
+ &[]
+ } else {
+ slice::from_raw_parts(self.ptr, self.len)
+ }
+ }
+}
+
+impl DfBytes {
+ /// # Safety
+ /// `ptr` must be null or point to `len` valid bytes alive for the borrow.
+ pub unsafe fn as_slice(&self) -> &[u8] {
+ if self.ptr.is_null() || self.len == 0 {
+ &[]
+ } else {
+ slice::from_raw_parts(self.ptr, self.len)
+ }
+ }
+}
+
+/// View a `(ptr, len)` array argument as a slice, treating null+0 as empty.
+///
+/// # Safety
+/// `ptr` must be null or point to `len` valid `T` for the borrow.
+pub unsafe fn array<'a, T>(ptr: *const T, len: usize) -> &'a [T] {
+ if ptr.is_null() || len == 0 {
+ &[]
+ } else {
+ slice::from_raw_parts(ptr, len)
+ }
+}
diff --git a/native-ffi/src/lib.rs b/native-ffi/src/lib.rs
new file mode 100644
index 0000000..0f83e74
--- /dev/null
+++ b/native-ffi/src/lib.rs
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! A plain-C scan ABI over the Arrow C Data / C Stream interface.
+//!
+//! This crate exposes a DataFusion [`TableProvider`](datafusion::catalog::TableProvider)
+//! scan as a set of `extern "C"` entry points that speak only C types and the
+//! Arrow C Data interface. There is **no JVM/JNI dependency**: the front door
+//! is callable from Java (via a thin JNI shim or the JDK 22+ FFM API), but also
+//! from Python (cffi/ctypes), Go (cgo), R, or another Rust crate. That is the
+//! property that lets the surface live close to DataFusion proper and get
+//! reviewed by a wider audience -- the request on
+//! .
+//!
+//! # Shape
+//!
+//! Providers are *compiled into* the final cdylib ("approach A"): a consumer
+//! links this crate as an `rlib`, [`register_provider`]s its builders by name,
+//! and the `df_scan_*` symbols are exported from the resulting shared library.
+//! The data plane never crosses as serialized batches -- each scanned
+//! partition is handed back as a standard `FFI_ArrowArrayStream` the consumer
+//! imports zero-copy.
+//!
+//! # The ABI
+//!
+//! See `include/datafusion_scan.h` for the C header. In brief:
+//!
+//! - [`abi::df_scan_schema`] -- probe the output schema (Arrow C Schema)
+//! - [`abi::df_scan_create`] -- plan a scan, returns an opaque handle
+//! - [`abi::df_scan_partition_count`] -- number of output partitions
+//! - [`abi::df_scan_execute_partition`] -- one partition -> Arrow C Stream
+//! - [`abi::df_scan_execute`] -- whole plan -> Arrow C Stream
+//! - [`abi::df_scan_close`] -- drop the handle
+//! - [`abi::df_error_free`] -- free an error string
+//! - [`abi::df_scan_abi_version`] -- ABI major version for compatibility
+//!
+//! Every fallible call returns `0` on success and a nonzero
+//! [`error::DfStatus`] code on failure, setting `*out_err` to a malloc'd,
+//! NUL-terminated message the caller frees with `df_error_free`.
+
+pub mod abi;
+pub mod error;
+pub mod ffi_types;
+pub mod listing;
+pub mod reader;
+pub mod registry;
+pub mod runtime;
+pub mod scan;
+
+/// Generated protobuf types for the scan config / request wire formats
+/// (`proto/scan_config.proto`, `proto/scan_request.proto`). The `ScanConfig`
+/// blob is decoded by provider builders; `ScanRequest` is the engine-side
+/// staging object exploded into the C call's typed arguments.
+pub mod proto {
+ include!(concat!(env!("OUT_DIR"), "/datafusion_java.rs"));
+}
+
+#[cfg(feature = "demo-providers")]
+pub mod demo;
+
+pub use registry::register_provider;
+
+/// Major version of this ABI. Bumped on any breaking change to a `df_scan_*`
+/// signature or to the meaning of its arguments. Consumers compare against the
+/// value they were built for via [`abi::df_scan_abi_version`].
+pub const ABI_VERSION: u64 = 1;
diff --git a/native-ffi/src/listing.rs b/native-ffi/src/listing.rs
new file mode 100644
index 0000000..5b8aed6
--- /dev/null
+++ b/native-ffi/src/listing.rs
@@ -0,0 +1,226 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! A real file-backed provider builder, registered under `datafusion.listing`.
+//!
+//! Decodes the [`ScanConfig`](crate::proto::ScanConfig) blob into a DataFusion
+//! [`ListingTable`] over one or more paths read with a single file format.
+//! Demonstrates a builder that needs the session context: when no explicit
+//! schema is supplied it infers one from the data (and the context's object
+//! store registry resolves the paths).
+//!
+//! Object stores for remote URIs (s3://, gs://, ...) must be registered on the
+//! context by the embedding cdylib before a scan runs; the default context
+//! resolves local paths out of the box.
+
+use std::io::Cursor;
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::{Schema, SchemaRef};
+use datafusion::arrow::ipc::reader::StreamReader;
+use datafusion::catalog::TableProvider;
+use datafusion::datasource::file_format::arrow::ArrowFormat;
+use datafusion::datasource::file_format::avro::AvroFormat;
+use datafusion::datasource::file_format::csv::CsvFormat;
+use datafusion::datasource::file_format::file_compression_type::FileCompressionType;
+use datafusion::datasource::file_format::json::JsonFormat;
+use datafusion::datasource::file_format::parquet::ParquetFormat;
+use datafusion::datasource::file_format::FileFormat;
+use datafusion::datasource::listing::{
+ ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
+};
+use datafusion::prelude::SessionContext;
+use prost::Message;
+
+use crate::error::{DfStatus, ScanError, ScanResult};
+use crate::proto::{listing_source, scan_config, FileCompressionType as ProtoCompression};
+use crate::proto::{ListingSource, ScanConfig};
+use crate::registry::register_provider;
+use crate::runtime::handle;
+
+/// Registered builder name for the listing provider.
+pub const NAME: &str = "datafusion.listing";
+
+/// Register the listing provider. Call once at startup.
+pub fn register() {
+ register_provider(NAME, build);
+}
+
+fn build(
+ ctx: &SessionContext,
+ options: &[u8],
+ _partition: &[u8],
+) -> ScanResult> {
+ let config = ScanConfig::decode(options).map_err(|e| {
+ ScanError::new(
+ DfStatus::ProviderBuild,
+ format!("failed to decode ScanConfig: {e}"),
+ )
+ })?;
+
+ let listing = match config.source {
+ Some(scan_config::Source::Listing(l)) => l,
+ Some(scan_config::Source::Custom(_)) => {
+ return Err(ScanError::new(
+ DfStatus::ProviderBuild,
+ "datafusion.listing requires a listing source, got custom bytes",
+ ))
+ }
+ None => {
+ return Err(ScanError::new(
+ DfStatus::ProviderBuild,
+ "datafusion.listing requires a listing source, none set",
+ ))
+ }
+ };
+
+ if listing.paths.is_empty() {
+ return Err(ScanError::new(
+ DfStatus::ProviderBuild,
+ "listing source has no paths",
+ ));
+ }
+
+ let table_paths = listing
+ .paths
+ .iter()
+ .map(|p| {
+ ListingTableUrl::parse(p).map_err(|e| {
+ ScanError::new(DfStatus::ProviderBuild, format!("invalid path {p:?}: {e}"))
+ })
+ })
+ .collect::>>()?;
+
+ let listing_options = listing_options(&listing)?;
+
+ let mut table_config =
+ ListingTableConfig::new_with_multi_paths(table_paths).with_listing_options(listing_options);
+
+ table_config = match &listing.schema_ipc {
+ Some(bytes) => table_config.with_schema(schema_from_ipc(bytes)?),
+ // No explicit schema: infer from the data, using the context's state
+ // (and thus its object store registry) to read it.
+ None => handle()
+ .block_on(table_config.infer_schema(&ctx.state()))
+ .map_err(|e| {
+ ScanError::new(
+ DfStatus::ProviderBuild,
+ format!("failed to infer listing schema: {e}"),
+ )
+ })?,
+ };
+
+ let table = ListingTable::try_new(table_config)
+ .map_err(|e| ScanError::new(DfStatus::ProviderBuild, e.to_string()))?;
+ Ok(Arc::new(table))
+}
+
+/// Map the proto format oneof to a DataFusion [`ListingOptions`]. Covers the
+/// option fields the read-option messages expose today; unset fields keep the
+/// format's defaults.
+fn listing_options(listing: &ListingSource) -> ScanResult {
+ use listing_source::Format;
+
+ let (format, default_ext): (Arc, &str) = match &listing.format {
+ Some(Format::Csv(c)) => {
+ let mut fmt = CsvFormat::default()
+ .with_has_header(c.has_header)
+ .with_delimiter(byte(c.delimiter, b',')?)
+ .with_quote(byte(c.quote, b'"')?)
+ .with_newlines_in_values(c.newlines_in_values.unwrap_or(false))
+ .with_file_compression_type(compression(c.file_compression_type));
+ if let Some(t) = c.terminator {
+ fmt = fmt.with_terminator(Some(byte(t, b'\n')?));
+ }
+ if let Some(e) = c.escape {
+ fmt = fmt.with_escape(Some(byte(e, b'\\')?));
+ }
+ if let Some(cm) = c.comment {
+ fmt = fmt.with_comment(Some(byte(cm, b'#')?));
+ }
+ (Arc::new(fmt), extension(&c.file_extension, ".csv"))
+ }
+ Some(Format::Json(j)) => {
+ let fmt = JsonFormat::default()
+ .with_file_compression_type(compression(j.file_compression_type));
+ (Arc::new(fmt), extension(&j.file_extension, ".json"))
+ }
+ Some(Format::Parquet(p)) => {
+ // Parquet read tuning (pruning / metadata hints) is applied through
+ // session config at scan time, not on the format here.
+ (
+ Arc::new(ParquetFormat::default()),
+ extension(&p.file_extension, ".parquet"),
+ )
+ }
+ Some(Format::Avro(a)) => (Arc::new(AvroFormat), extension(&a.file_extension, ".avro")),
+ Some(Format::Arrow(a)) => (
+ Arc::new(ArrowFormat),
+ extension(&a.file_extension, ".arrow"),
+ ),
+ None => {
+ return Err(ScanError::new(
+ DfStatus::ProviderBuild,
+ "listing source has no file format",
+ ))
+ }
+ };
+
+ Ok(ListingOptions::new(format).with_file_extension(default_ext.to_string()))
+}
+
+/// A single byte sent over the wire as a `uint32`. Falls back to `default` when
+/// the field is unset (0), and rejects values that do not fit in a byte.
+fn byte(value: u32, default: u8) -> ScanResult {
+ if value == 0 {
+ return Ok(default);
+ }
+ u8::try_from(value)
+ .map_err(|_| ScanError::invalid_argument(format!("byte option {value} exceeds 255")))
+}
+
+fn extension<'a>(configured: &'a str, default: &'a str) -> &'a str {
+ if configured.is_empty() {
+ default
+ } else {
+ configured
+ }
+}
+
+fn compression(value: i32) -> FileCompressionType {
+ match ProtoCompression::try_from(value) {
+ Ok(ProtoCompression::Gzip) => FileCompressionType::GZIP,
+ Ok(ProtoCompression::Bzip2) => FileCompressionType::BZIP2,
+ Ok(ProtoCompression::Xz) => FileCompressionType::XZ,
+ Ok(ProtoCompression::Zstd) => FileCompressionType::ZSTD,
+ // Unspecified / uncompressed / unknown -> uncompressed.
+ _ => FileCompressionType::UNCOMPRESSED,
+ }
+}
+
+/// Read a `SchemaRef` from Arrow IPC stream bytes (a schema message, optionally
+/// followed by zero batches -- the shape `StreamWriter::finish` produces).
+fn schema_from_ipc(bytes: &[u8]) -> ScanResult {
+ let reader = StreamReader::try_new(Cursor::new(bytes), None).map_err(|e| {
+ ScanError::new(
+ DfStatus::ProviderBuild,
+ format!("failed to read schema_ipc: {e}"),
+ )
+ })?;
+ let schema: Schema = reader.schema().as_ref().clone();
+ Ok(Arc::new(schema))
+}
diff --git a/native-ffi/src/reader.rs b/native-ffi/src/reader.rs
new file mode 100644
index 0000000..445668e
--- /dev/null
+++ b/native-ffi/src/reader.rs
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Bridge from DataFusion's async stream to the synchronous
+//! [`RecordBatchReader`] that `FFI_ArrowArrayStream` pulls.
+
+use std::panic::{catch_unwind, AssertUnwindSafe};
+
+use datafusion::arrow::array::RecordBatch;
+use datafusion::arrow::datatypes::SchemaRef;
+use datafusion::arrow::error::ArrowError;
+use datafusion::arrow::record_batch::RecordBatchReader;
+use datafusion::execution::SendableRecordBatchStream;
+use futures::StreamExt;
+
+use crate::runtime::runtime;
+
+/// Wraps a [`SendableRecordBatchStream`] as a [`RecordBatchReader`]. Each
+/// `next()` drives one `block_on(stream.next())`, so memory stays bounded by
+/// the pipeline plus a single in-flight batch.
+pub struct StreamingReader {
+ pub schema: SchemaRef,
+ pub stream: SendableRecordBatchStream,
+}
+
+impl Iterator for StreamingReader {
+ type Item = Result;
+
+ fn next(&mut self) -> Option {
+ // Arrow's C Stream vtable calls this from the *consumer's* thread,
+ // outside any guard. A panic unwinding across the C boundary is UB, so
+ // catch it and surface as an ArrowError -- the consumer sees a normal
+ // stream error (mapped to an exception on the Java side).
+ let next = catch_unwind(AssertUnwindSafe(|| runtime().block_on(self.stream.next())));
+ match next {
+ Ok(item) => item.map(|r| r.map_err(|e| ArrowError::ExternalError(Box::new(e)))),
+ Err(panic) => Some(Err(ArrowError::ExternalError(
+ format!("panic in DataFusion stream: {}", panic_message(&panic)).into(),
+ ))),
+ }
+ }
+}
+
+impl RecordBatchReader for StreamingReader {
+ fn schema(&self) -> SchemaRef {
+ self.schema.clone()
+ }
+}
+
+/// Best-effort extraction of a panic payload's message.
+pub fn panic_message(panic: &(dyn std::any::Any + Send)) -> String {
+ if let Some(s) = panic.downcast_ref::<&str>() {
+ (*s).to_string()
+ } else if let Some(s) = panic.downcast_ref::() {
+ s.clone()
+ } else {
+ "unknown panic".to_string()
+ }
+}
diff --git a/native-ffi/src/registry.rs b/native-ffi/src/registry.rs
new file mode 100644
index 0000000..bccfb0c
--- /dev/null
+++ b/native-ffi/src/registry.rs
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Provider builder registry.
+//!
+//! "Approach A" means the providers ship compiled into the final cdylib rather
+//! than being imported over an FFI. A consumer registers each builder by name
+//! at startup; the C ABI selects one by that name and hands it the opaque
+//! `options`/`partition` byte blobs it was given. The builder decodes those
+//! however it likes (protobuf, JSON, bincode) -- the ABI stays oblivious.
+
+use std::collections::HashMap;
+use std::sync::{Arc, RwLock};
+
+use datafusion::catalog::TableProvider;
+use datafusion::prelude::SessionContext;
+
+use crate::error::{DfStatus, ScanError, ScanResult};
+
+/// Builds a provider from caller-supplied bytes.
+///
+/// * `ctx` -- the scan's session context, already configured with the
+/// caller's tuning/overrides. A builder that must infer a schema or read an
+/// object store (e.g. a listing table) uses `ctx.state()` for that; simple
+/// in-memory providers ignore it.
+/// * `options` -- provider-level config (which table, paths, schema, ...).
+/// * `partition` -- optional per-partition slice descriptor; empty for a
+/// whole-table scan.
+///
+/// `options`/`partition` are opaque to the ABI; their encoding is a contract
+/// between the registrant and whoever fills the bytes on the other side of the
+/// boundary (the in-tree builders use [`crate::proto::ScanConfig`]).
+pub type ProviderBuilder = fn(
+ ctx: &SessionContext,
+ options: &[u8],
+ partition: &[u8],
+) -> ScanResult>;
+
+fn registry() -> &'static RwLock> {
+ static REGISTRY: std::sync::OnceLock>> =
+ std::sync::OnceLock::new();
+ REGISTRY.get_or_init(|| RwLock::new(HashMap::new()))
+}
+
+/// Register `builder` under `name`, replacing any previous registration.
+/// Call once per provider at cdylib startup (e.g. from a `#[ctor]` or an
+/// exported init function the consumer invokes).
+pub fn register_provider(name: impl Into, builder: ProviderBuilder) {
+ registry()
+ .write()
+ .expect("provider registry poisoned")
+ .insert(name.into(), builder);
+}
+
+/// Look up `name` and build a provider from the given bytes.
+pub fn build_provider(
+ name: &str,
+ ctx: &SessionContext,
+ options: &[u8],
+ partition: &[u8],
+) -> ScanResult> {
+ let builder = {
+ let guard = registry().read().expect("provider registry poisoned");
+ guard.get(name).copied()
+ };
+ match builder {
+ Some(b) => b(ctx, options, partition),
+ None => Err(ScanError::new(
+ DfStatus::UnknownProvider,
+ format!("no provider builder registered under name {name:?}"),
+ )),
+ }
+}
diff --git a/native-ffi/src/runtime.rs b/native-ffi/src/runtime.rs
new file mode 100644
index 0000000..87fe2e7
--- /dev/null
+++ b/native-ffi/src/runtime.rs
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! The cdylib-wide Tokio runtime.
+//!
+//! DataFusion planning and execution are async; this ABI is synchronous, so
+//! every call that awaits does so through this runtime. Statically linked into
+//! whatever cdylib embeds this crate, so it is a per-cdylib singleton -- two
+//! libraries loaded in one process get independent runtimes and cannot collide.
+//!
+//! This mirrors `datafusion-jni-common`'s runtime but is deliberately
+//! duplicated here so the C ABI carries no dependency on the JNI crate.
+
+use std::sync::OnceLock;
+
+use tokio::runtime::{Handle, Runtime};
+
+static RT: OnceLock = OnceLock::new();
+
+/// The shared multi-thread Tokio runtime, created on first use.
+pub fn runtime() -> &'static Runtime {
+ RT.get_or_init(|| Runtime::new().expect("failed to create Tokio runtime"))
+}
+
+/// Handle to [`runtime`], for `block_on` / `enter`.
+pub fn handle() -> &'static Handle {
+ runtime().handle()
+}
diff --git a/native-ffi/src/scan.rs b/native-ffi/src/scan.rs
new file mode 100644
index 0000000..4a668d0
--- /dev/null
+++ b/native-ffi/src/scan.rs
@@ -0,0 +1,183 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Planning and execution core, free of any C/JVM concerns.
+//!
+//! This is the JNI-free port of the logic in PR #103's `spark/bridge/src/scan.rs`:
+//! build the provider, register it on a private `SessionContext` with the
+//! caller-pinned config, apply the pruned projection and proto-encoded pushed
+//! filters, and plan once. The resulting [`ScanHandle`] then yields one
+//! independent stream per plan partition.
+//!
+//! Spark-specific type widening is intentionally **not** here: it is a consumer
+//! concern (apply a `WideningTableProvider` decorator inside the registered
+//! builder if you need it), so this core stays a faithful DataFusion scan.
+
+use std::sync::Arc;
+
+use datafusion::arrow::datatypes::SchemaRef;
+use datafusion::dataframe::DataFrame;
+use datafusion::execution::TaskContext;
+use datafusion::physical_plan::{execute_stream, ExecutionPlan};
+use datafusion::prelude::{SessionConfig, SessionContext};
+use datafusion_proto::logical_plan::from_proto::parse_expr;
+use datafusion_proto::logical_plan::DefaultLogicalExtensionCodec;
+use datafusion_proto::protobuf::LogicalExprNode;
+use prost::Message;
+
+use crate::error::{DfStatus, ScanError, ScanResult};
+use crate::reader::StreamingReader;
+use crate::registry::build_provider;
+use crate::runtime::handle;
+
+/// Registration name of the provider on the scan's private context. Never
+/// surfaces in SQL (the plan is built through the DataFrame API), so no
+/// quoting/collision concern.
+const SCAN_TABLE_NAME: &str = "df_scan";
+
+/// Inputs to [`create`], decoded from the C arguments by the ABI layer.
+pub struct ScanRequest<'a> {
+ pub provider: &'a str,
+ pub options: &'a [u8],
+ pub partition: &'a [u8],
+ /// `<= 0` leaves the DataFusion default.
+ pub target_partitions: i32,
+ /// `<= 0` leaves the DataFusion default.
+ pub batch_size: i32,
+ pub config_overrides: Vec<(String, String)>,
+ /// Column names to project; empty selects all.
+ pub projection: Vec,
+ /// Each entry is a serialized `datafusion.LogicalExprNode`.
+ pub filters: Vec>,
+ /// Optional row limit pushed into the scan. `None` means no limit.
+ pub limit: Option,
+}
+
+/// A planned scan. Holds the context alive for the plan's lifetime.
+pub struct ScanHandle {
+ _ctx: SessionContext,
+ plan: Arc,
+ task_ctx: Arc,
+}
+
+/// Build the provider via the registry and return its output schema, without
+/// planning. Mirrors #103's `provider_schema_ipc`, but returns the live
+/// `SchemaRef` (the ABI converts it to an Arrow C Schema). Uses a default
+/// context -- enough for schema inference against the default (local) object
+/// store; a provider needing custom stores should be built through [`create`].
+pub fn schema(provider: &str, options: &[u8], partition: &[u8]) -> ScanResult {
+ let ctx = SessionContext::new();
+ let provider = build_provider(provider, &ctx, options, partition)?;
+ Ok(provider.schema())
+}
+
+/// Build, register, project, filter, and plan exactly once.
+pub fn create(req: ScanRequest<'_>) -> ScanResult {
+ // Build the context first: a provider may need it (schema inference, object
+ // store access) at construction time.
+ let mut config = SessionConfig::new();
+ if req.target_partitions > 0 {
+ config = config.with_target_partitions(req.target_partitions as usize);
+ }
+ if req.batch_size > 0 {
+ config = config.with_batch_size(req.batch_size as usize);
+ }
+ for (key, value) in &req.config_overrides {
+ config.options_mut().set(key, value)?;
+ }
+
+ let ctx = SessionContext::new_with_config(config);
+ let provider = build_provider(req.provider, &ctx, req.options, req.partition)?;
+ ctx.register_table(SCAN_TABLE_NAME, provider)?;
+
+ let mut df: DataFrame = handle().block_on(ctx.table(SCAN_TABLE_NAME))?;
+ if !req.projection.is_empty() {
+ let refs: Vec<&str> = req.projection.iter().map(String::as_str).collect();
+ df = df.select_columns(&refs)?;
+ }
+ for bytes in &req.filters {
+ let node = LogicalExprNode::decode(bytes.as_slice())?;
+ // TaskContext implements FunctionRegistry; the default codec suffices
+ // for the column/literal/builtin expressions a predicate translator
+ // emits.
+ let registry = df.task_ctx();
+ let expr = parse_expr(&node, ®istry, &DefaultLogicalExtensionCodec {})
+ .map_err(|e| ScanError::new(DfStatus::Planning, e.to_string()))?;
+ df = df.filter(expr)?;
+ }
+ if let Some(fetch) = req.limit {
+ df = df.limit(0, Some(fetch))?;
+ }
+
+ // task_ctx() borrows df; capture before create_physical_plan consumes it.
+ let task_ctx = Arc::new(df.task_ctx());
+ let plan = handle().block_on(df.create_physical_plan())?;
+
+ Ok(ScanHandle {
+ _ctx: ctx,
+ plan,
+ task_ctx,
+ })
+}
+
+impl ScanHandle {
+ /// Output partition count of the planned physical plan.
+ pub fn partition_count(&self) -> usize {
+ self.plan
+ .properties()
+ .output_partitioning()
+ .partition_count()
+ }
+
+ /// Open an independent reader over one plan partition. Concurrently
+ /// callable across partitions: `ExecutionPlan`/`TaskContext` are
+ /// `Send + Sync`, and each call only clones their `Arc`s.
+ pub fn execute_partition(&self, partition: usize) -> ScanResult {
+ let count = self.partition_count();
+ if partition >= count {
+ return Err(ScanError::new(
+ DfStatus::InvalidArgument,
+ format!("partition index {partition} out of range: plan has {count} partition(s)"),
+ ));
+ }
+ let plan = Arc::clone(&self.plan);
+ let task_ctx = Arc::clone(&self.task_ctx);
+ let schema: SchemaRef = plan.schema();
+
+ // execute() is synchronous but operators may tokio::spawn at
+ // execute()-time (RepartitionExec et al.), needing a runtime context.
+ let stream = {
+ let _guard = handle().enter();
+ plan.execute(partition, task_ctx)
+ .map_err(|e| ScanError::new(DfStatus::Execution, e.to_string()))?
+ };
+ Ok(StreamingReader { schema, stream })
+ }
+
+ /// Open one reader over the whole plan (all partitions coalesced).
+ pub fn execute_all(&self) -> ScanResult {
+ let plan = Arc::clone(&self.plan);
+ let task_ctx = Arc::clone(&self.task_ctx);
+ let schema: SchemaRef = plan.schema();
+ let stream = {
+ let _guard = handle().enter();
+ execute_stream(plan, task_ctx)
+ .map_err(|e| ScanError::new(DfStatus::Execution, e.to_string()))?
+ };
+ Ok(StreamingReader { schema, stream })
+ }
+}
diff --git a/native-ffi/tests/listing.rs b/native-ffi/tests/listing.rs
new file mode 100644
index 0000000..eac5e2e
--- /dev/null
+++ b/native-ffi/tests/listing.rs
@@ -0,0 +1,179 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! End-to-end test of the `datafusion.listing` provider through the plain-C
+//! ABI: write a CSV, encode a ScanConfig pointing at it, scan it, and import
+//! the result back through the Arrow C Stream interface -- the path a foreign
+//! consumer takes. Exercises schema inference (no explicit schema supplied).
+
+use std::ffi::{c_char, CStr};
+use std::fs;
+use std::process;
+use std::ptr;
+
+use datafusion::arrow::array::Int64Array;
+use datafusion::arrow::ffi::FFI_ArrowSchema;
+use datafusion::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
+
+use datafusion_scan_ffi::abi::{
+ df_error_free, df_scan_close, df_scan_create, df_scan_execute, df_scan_partition_count,
+ df_scan_schema, DfScanHandle,
+};
+use datafusion_scan_ffi::ffi_types::{DfBytes, DfStr};
+use datafusion_scan_ffi::listing;
+use datafusion_scan_ffi::proto::{
+ listing_source, scan_config, CsvReadOptionsProto, ListingSource, ScanConfig,
+};
+use prost::Message;
+
+unsafe fn take_err(err: *mut c_char) -> Option {
+ if err.is_null() {
+ None
+ } else {
+ let s = CStr::from_ptr(err).to_string_lossy().into_owned();
+ df_error_free(err);
+ Some(s)
+ }
+}
+
+/// Write a CSV into a unique temp dir and return (dir, file path).
+fn write_csv() -> (std::path::PathBuf, String) {
+ let dir = std::env::temp_dir().join(format!("df-scan-ffi-{}", process::id()));
+ fs::create_dir_all(&dir).expect("create temp dir");
+ let path = dir.join("data.csv");
+ fs::write(&path, "id,name\n1,a\n2,b\n3,c\n").expect("write csv");
+ (dir, path.to_string_lossy().into_owned())
+}
+
+/// Encode a ScanConfig for a CSV listing source over `path`.
+fn csv_config(path: &str) -> Vec {
+ ScanConfig {
+ provider: listing::NAME.to_string(),
+ source: Some(scan_config::Source::Listing(ListingSource {
+ paths: vec![path.to_string()],
+ schema_ipc: None,
+ format: Some(listing_source::Format::Csv(CsvReadOptionsProto {
+ has_header: true,
+ delimiter: b',' as u32,
+ quote: b'"' as u32,
+ file_extension: ".csv".to_string(),
+ ..Default::default()
+ })),
+ })),
+ }
+ .encode_to_vec()
+}
+
+fn provider() -> DfStr {
+ DfStr {
+ ptr: listing::NAME.as_ptr(),
+ len: listing::NAME.len(),
+ }
+}
+
+fn options(bytes: &[u8]) -> DfBytes {
+ DfBytes {
+ ptr: bytes.as_ptr(),
+ len: bytes.len(),
+ }
+}
+
+const EMPTY: DfBytes = DfBytes {
+ ptr: ptr::null(),
+ len: 0,
+};
+
+#[test]
+fn listing_csv_schema_is_inferred() {
+ listing::register();
+ let (_dir, path) = write_csv();
+ let cfg = csv_config(&path);
+
+ let mut schema = FFI_ArrowSchema::empty();
+ let mut err: *mut c_char = ptr::null_mut();
+ let status = unsafe { df_scan_schema(provider(), options(&cfg), EMPTY, &mut schema, &mut err) };
+ assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+
+ let schema =
+ datafusion::arrow::datatypes::Schema::try_from(&schema).expect("import FFI_ArrowSchema");
+ let names: Vec<_> = schema.fields().iter().map(|f| f.name().as_str()).collect();
+ assert_eq!(names, vec!["id", "name"]);
+}
+
+#[test]
+fn listing_csv_scans_rows() {
+ listing::register();
+ let (_dir, path) = write_csv();
+ let cfg = csv_config(&path);
+
+ // Plan.
+ let mut handle: *mut DfScanHandle = ptr::null_mut();
+ let mut err: *mut c_char = ptr::null_mut();
+ let status = unsafe {
+ df_scan_create(
+ provider(),
+ options(&cfg),
+ EMPTY,
+ 0,
+ 0,
+ -1,
+ ptr::null(),
+ 0,
+ ptr::null(),
+ 0,
+ ptr::null(),
+ 0,
+ &mut handle,
+ &mut err,
+ )
+ };
+ assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+ assert!(!handle.is_null());
+
+ // Partition count is reported.
+ let mut count = 0i32;
+ let mut err2: *mut c_char = ptr::null_mut();
+ assert_eq!(
+ unsafe { df_scan_partition_count(handle, &mut count, &mut err2) },
+ 0
+ );
+ assert!(count >= 1, "expected at least one partition, got {count}");
+
+ // Execute the whole plan as one coalesced stream and sum `id`.
+ let mut stream = FFI_ArrowArrayStream::empty();
+ let mut err3: *mut c_char = ptr::null_mut();
+ let status = unsafe { df_scan_execute(handle, &mut stream, &mut err3) };
+ assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err3) });
+
+ let reader = unsafe { ArrowArrayStreamReader::from_raw(&mut stream) }.expect("import stream");
+ let mut total: i64 = 0;
+ let mut rows = 0usize;
+ for batch in reader {
+ let batch = batch.expect("batch");
+ rows += batch.num_rows();
+ let ids = batch
+ .column(0)
+ .as_any()
+ .downcast_ref::()
+ .expect("id is Int64");
+ total += ids.values().iter().sum::();
+ }
+ assert_eq!(rows, 3);
+ assert_eq!(total, 1 + 2 + 3);
+
+ unsafe { df_scan_close(handle) };
+}
diff --git a/native-ffi/tests/proto.rs b/native-ffi/tests/proto.rs
new file mode 100644
index 0000000..ac668c7
--- /dev/null
+++ b/native-ffi/tests/proto.rs
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Confirms the generated scan-config / scan-request types encode and decode,
+//! including a per-format read-option message embedded through the source
+//! oneof -- i.e. the imports across `proto/*.proto` resolved at build time.
+
+use datafusion_scan_ffi::proto::{
+ listing_source, scan_config, CsvReadOptionsProto, ListingSource, ScanConfig, ScanRequest,
+};
+use prost::Message;
+
+#[test]
+fn scan_config_with_listing_source_roundtrips() {
+ let config = ScanConfig {
+ provider: "datafusion.listing".to_string(),
+ source: Some(scan_config::Source::Listing(ListingSource {
+ paths: vec!["s3://bucket/data/".to_string()],
+ schema_ipc: None,
+ format: Some(listing_source::Format::Csv(CsvReadOptionsProto {
+ has_header: true,
+ delimiter: b',' as u32,
+ quote: b'"' as u32,
+ file_extension: ".csv".to_string(),
+ ..Default::default()
+ })),
+ })),
+ };
+
+ let bytes = config.encode_to_vec();
+ let decoded = ScanConfig::decode(bytes.as_slice()).expect("decode ScanConfig");
+
+ assert_eq!(decoded.provider, "datafusion.listing");
+ match decoded.source {
+ Some(scan_config::Source::Listing(l)) => {
+ assert_eq!(l.paths, vec!["s3://bucket/data/".to_string()]);
+ match l.format {
+ Some(listing_source::Format::Csv(c)) => {
+ assert!(c.has_header);
+ assert_eq!(c.delimiter, b',' as u32);
+ }
+ other => panic!("expected CSV format, got {other:?}"),
+ }
+ }
+ other => panic!("expected listing source, got {other:?}"),
+ }
+}
+
+#[test]
+fn scan_request_roundtrips() {
+ let req = ScanRequest {
+ projection: vec!["id".to_string(), "name".to_string()],
+ filters: vec![vec![1, 2, 3], vec![4, 5]],
+ limit: Some(100),
+ target_partitions: 8,
+ batch_size: 0,
+ config_overrides: [(
+ "datafusion.execution.parquet.pushdown_filters".to_string(),
+ "true".to_string(),
+ )]
+ .into_iter()
+ .collect(),
+ };
+
+ let bytes = req.encode_to_vec();
+ let decoded = ScanRequest::decode(bytes.as_slice()).expect("decode ScanRequest");
+
+ assert_eq!(decoded.projection, vec!["id", "name"]);
+ assert_eq!(decoded.filters.len(), 2);
+ assert_eq!(decoded.limit, Some(100));
+ assert_eq!(decoded.target_partitions, 8);
+ assert_eq!(
+ decoded
+ .config_overrides
+ .get("datafusion.execution.parquet.pushdown_filters")
+ .map(String::as_str),
+ Some("true")
+ );
+}
diff --git a/native-ffi/tests/roundtrip.rs b/native-ffi/tests/roundtrip.rs
new file mode 100644
index 0000000..3ec6436
--- /dev/null
+++ b/native-ffi/tests/roundtrip.rs
@@ -0,0 +1,213 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Exercises the plain-C ABI exactly as a foreign consumer would: call the
+//! `df_scan_*` entry points with C structs, hand a caller-allocated
+//! `FFI_ArrowArrayStream` across the boundary, then import it back through the
+//! Arrow C Stream interface (`ArrowArrayStreamReader`) -- the Rust analogue of
+//! arrow-java's `Data.importArrayStream`. No JVM involved.
+
+use std::ffi::{c_char, CStr};
+use std::ptr;
+
+use datafusion::arrow::array::Int64Array;
+use datafusion::arrow::ffi::FFI_ArrowSchema;
+use datafusion::arrow::ffi_stream::{ArrowArrayStreamReader, FFI_ArrowArrayStream};
+
+use datafusion_scan_ffi::abi::{
+ df_error_free, df_scan_abi_version, df_scan_close, df_scan_create, df_scan_execute,
+ df_scan_execute_partition, df_scan_partition_count, df_scan_schema, DfScanHandle,
+};
+use datafusion_scan_ffi::ffi_types::{DfBytes, DfStr};
+use datafusion_scan_ffi::{demo, ABI_VERSION};
+
+fn provider() -> DfStr {
+ DfStr {
+ ptr: demo::NAME.as_ptr(),
+ len: demo::NAME.len(),
+ }
+}
+
+const EMPTY_BYTES: DfBytes = DfBytes {
+ ptr: ptr::null(),
+ len: 0,
+};
+
+/// Pull an err string (if any) for assertions, freeing it.
+unsafe fn take_err(err: *mut c_char) -> Option {
+ if err.is_null() {
+ None
+ } else {
+ let s = CStr::from_ptr(err).to_string_lossy().into_owned();
+ df_error_free(err);
+ Some(s)
+ }
+}
+
+#[test]
+fn abi_version_matches() {
+ assert_eq!(df_scan_abi_version(), ABI_VERSION);
+}
+
+#[test]
+fn schema_probe_returns_provider_schema() {
+ demo::register();
+ let mut out = FFI_ArrowSchema::empty();
+ let mut err: *mut c_char = ptr::null_mut();
+ let status =
+ unsafe { df_scan_schema(provider(), EMPTY_BYTES, EMPTY_BYTES, &mut out, &mut err) };
+ assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+
+ let schema =
+ datafusion::arrow::datatypes::Schema::try_from(&out).expect("import FFI_ArrowSchema");
+ let names: Vec<_> = schema.fields().iter().map(|f| f.name().as_str()).collect();
+ assert_eq!(names, vec!["id", "name"]);
+}
+
+#[test]
+fn unknown_provider_reports_status_and_message() {
+ let bad = DfStr {
+ ptr: b"nope".as_ptr(),
+ len: 4,
+ };
+ let mut out = FFI_ArrowSchema::empty();
+ let mut err: *mut c_char = ptr::null_mut();
+ let status = unsafe { df_scan_schema(bad, EMPTY_BYTES, EMPTY_BYTES, &mut out, &mut err) };
+ assert_eq!(status, 2 /* DF_UNKNOWN_PROVIDER */);
+ let msg = unsafe { take_err(err) }.expect("error message");
+ assert!(msg.contains("nope"), "msg was: {msg}");
+}
+
+#[test]
+fn create_reports_two_partitions() {
+ demo::register();
+ let handle = create_full_scan();
+ let mut count = 0i32;
+ let mut err: *mut c_char = ptr::null_mut();
+ let status = unsafe { df_scan_partition_count(handle, &mut count, &mut err) };
+ assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+ assert_eq!(count, 2, "demo provider has two partitions");
+ unsafe { df_scan_close(handle) };
+}
+
+#[test]
+fn execute_partition_roundtrips_arrow_c_stream() {
+ demo::register();
+ let handle = create_full_scan();
+
+ // Sum `id` across both partitions by importing each stream back through
+ // the Arrow C Stream interface, the way a foreign consumer would.
+ let mut total: i64 = 0;
+ let mut rows = 0usize;
+ for partition in 0..2 {
+ let mut stream = FFI_ArrowArrayStream::empty();
+ let mut err: *mut c_char = ptr::null_mut();
+ let status = unsafe { df_scan_execute_partition(handle, partition, &mut stream, &mut err) };
+ assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+
+ let reader = unsafe { ArrowArrayStreamReader::from_raw(&mut stream) }
+ .expect("import FFI_ArrowArrayStream");
+ for batch in reader {
+ let batch = batch.expect("batch");
+ rows += batch.num_rows();
+ let ids = batch
+ .column(0)
+ .as_any()
+ .downcast_ref::()
+ .expect("id column is Int64");
+ total += ids.values().iter().sum::();
+ }
+ }
+
+ assert_eq!(rows, 5, "3 + 2 rows across the two partitions");
+ assert_eq!(total, 1 + 2 + 3 + 4 + 5);
+ unsafe { df_scan_close(handle) };
+}
+
+#[test]
+fn limit_caps_row_count() {
+ demo::register();
+ // demo provider has 5 rows across two partitions; cap at 2.
+ let mut handle: *mut DfScanHandle = ptr::null_mut();
+ let mut err: *mut c_char = ptr::null_mut();
+ let status = unsafe {
+ df_scan_create(
+ provider(),
+ EMPTY_BYTES,
+ EMPTY_BYTES,
+ 0,
+ 0,
+ 2, // limit
+ ptr::null(),
+ 0,
+ ptr::null(),
+ 0,
+ ptr::null(),
+ 0,
+ &mut handle,
+ &mut err,
+ )
+ };
+ assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+
+ // Read the whole plan; the limit must hold across partitions.
+ let mut stream = FFI_ArrowArrayStream::empty();
+ let mut err2: *mut c_char = ptr::null_mut();
+ assert_eq!(
+ unsafe { df_scan_execute(handle, &mut stream, &mut err2) },
+ 0,
+ "err: {:?}",
+ unsafe { take_err(err2) }
+ );
+ let reader = unsafe { ArrowArrayStreamReader::from_raw(&mut stream) }.expect("import");
+ let rows: usize = reader.map(|b| b.expect("batch").num_rows()).sum();
+ assert_eq!(rows, 2, "limit should cap the scan at 2 rows");
+
+ unsafe { df_scan_close(handle) };
+}
+
+#[test]
+fn close_is_null_safe() {
+ unsafe { df_scan_close(ptr::null_mut()) };
+}
+
+/// Plan a full scan (no projection / filters) over the demo provider.
+fn create_full_scan() -> *mut DfScanHandle {
+ let mut handle: *mut DfScanHandle = ptr::null_mut();
+ let mut err: *mut c_char = ptr::null_mut();
+ let status = unsafe {
+ df_scan_create(
+ provider(),
+ EMPTY_BYTES,
+ EMPTY_BYTES,
+ 0,
+ 0,
+ -1,
+ ptr::null(),
+ 0,
+ ptr::null(),
+ 0,
+ ptr::null(),
+ 0,
+ &mut handle,
+ &mut err,
+ )
+ };
+ assert_eq!(status, 0, "err: {:?}", unsafe { take_err(err) });
+ assert!(!handle.is_null());
+ handle
+}
diff --git a/native-jni/Cargo.toml b/native-jni/Cargo.toml
new file mode 100644
index 0000000..1001bf6
--- /dev/null
+++ b/native-jni/Cargo.toml
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-scan-jni"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+publish = false
+
+[lib]
+# The JVM-loaded shim. Thin: it marshals Java args into the in-process scan
+# core of `datafusion-scan-ffi` and writes Arrow C Stream / C Schema structs
+# into the addresses arrow-java allocated. All Arrow data crosses via the C
+# Data interface, not through JNI.
+crate-type = ["cdylib"]
+
+[dependencies]
+# The plain-C scan crate, used in-process. `demo-providers` registers the
+# in-memory provider alongside `datafusion.listing` for testing.
+datafusion-scan-ffi = { path = "../native-ffi", features = ["demo-providers"] }
+# Arrow C interface types written into Java-allocated structs.
+arrow = { workspace = true }
+# Decodes the engine's ScanRequest blob.
+prost = { workspace = true }
+jni = { workspace = true }
diff --git a/native-jni/src/lib.rs b/native-jni/src/lib.rs
new file mode 100644
index 0000000..03dd9e8
--- /dev/null
+++ b/native-jni/src/lib.rs
@@ -0,0 +1,238 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Thin JNI shim over the plain-C scan core (`datafusion-scan-ffi`).
+//!
+//! This is the JVM's path to the scan ABI. It is deliberately minimal: it
+//! marshals Java arguments (a `String` provider name and two `byte[]` blobs)
+//! into the in-process scan core, hands back an opaque handle as a `jlong`,
+//! and -- for the data plane -- writes a standard `FFI_ArrowArrayStream` (or
+//! `FFI_ArrowSchema`) into the address arrow-java allocated. **No Arrow data
+//! crosses the JNI boundary**: batches flow through the Arrow C Stream
+//! interface, which arrow-java imports with `Data.importArrayStream`.
+//!
+//! Everything here mirrors `core`'s existing `DataFrame` collect path; the only
+//! new ABI is the handful of `Java_org_apache_datafusion_scan_NativeScan_*`
+//! entry points below. Non-Java consumers use the `df_scan_*` C symbols
+//! exported by `datafusion-scan-ffi` instead; this crate is purely the JVM
+//! adapter.
+
+use std::sync::OnceLock;
+
+use arrow::ffi::FFI_ArrowSchema;
+use arrow::ffi_stream::FFI_ArrowArrayStream;
+use datafusion_scan_ffi::proto::ScanRequest as ProtoScanRequest;
+use datafusion_scan_ffi::scan::{self, ScanHandle, ScanRequest};
+use datafusion_scan_ffi::{demo, listing};
+use jni::objects::{JByteArray, JClass, JString};
+use jni::sys::{jint, jlong};
+use jni::JNIEnv;
+use prost::Message;
+
+/// Register the in-tree providers exactly once. The shim is the registration
+/// point for the JVM build; a non-Java embedder registers its own.
+fn ensure_registered() {
+ static INIT: OnceLock<()> = OnceLock::new();
+ INIT.get_or_init(|| {
+ listing::register();
+ demo::register();
+ });
+}
+
+/// Run `body`; on `Err`, throw a Java `RuntimeException` and return `default`.
+/// Mirrors the project's existing `try_unwrap_or_throw` pattern.
+fn try_or_throw(
+ env: &mut JNIEnv,
+ default: T,
+ body: impl FnOnce(&mut JNIEnv) -> Result,
+) -> T {
+ match body(env) {
+ Ok(value) => value,
+ Err(message) => {
+ // If throwing fails there is nothing more we can do; the default is
+ // still returned so we don't leave the stack in a bad state.
+ let _ = env.throw_new("java/lang/RuntimeException", message);
+ default
+ }
+ }
+}
+
+fn read_bytes(env: &mut JNIEnv, arr: &JByteArray) -> Result, String> {
+ if arr.is_null() {
+ Ok(Vec::new())
+ } else {
+ env.convert_byte_array(arr).map_err(|e| e.to_string())
+ }
+}
+
+fn read_string(env: &mut JNIEnv, s: &JString) -> Result {
+ env.get_string(s).map(Into::into).map_err(|e| e.to_string())
+}
+
+/// Decode the engine's `ScanRequest` blob into the scan core's request,
+/// borrowing the provider name and config bytes. Empty blob -> no pushdown.
+fn build_request<'a>(
+ provider: &'a str,
+ config: &'a [u8],
+ scan_request: &[u8],
+) -> Result, String> {
+ let req = if scan_request.is_empty() {
+ ProtoScanRequest::default()
+ } else {
+ ProtoScanRequest::decode(scan_request)
+ .map_err(|e| format!("failed to decode ScanRequest: {e}"))?
+ };
+ Ok(ScanRequest {
+ provider,
+ options: config,
+ partition: &[],
+ target_partitions: req.target_partitions,
+ batch_size: req.batch_size,
+ limit: req.limit.map(|l| l as usize),
+ config_overrides: req.config_overrides.into_iter().collect(),
+ projection: req.projection,
+ filters: req.filters,
+ })
+}
+
+/// Probe a provider's output schema, writing an `FFI_ArrowSchema` into the
+/// arrow-java-allocated `ArrowSchema` at `schema_addr`.
+#[no_mangle]
+pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_providerSchema<'local>(
+ mut env: JNIEnv<'local>,
+ _class: JClass<'local>,
+ provider: JString<'local>,
+ config: JByteArray<'local>,
+ schema_addr: jlong,
+) {
+ ensure_registered();
+ try_or_throw(&mut env, (), |env| {
+ if schema_addr == 0 {
+ return Err("schema address is null".to_string());
+ }
+ let provider = read_string(env, &provider)?;
+ let config = read_bytes(env, &config)?;
+ let schema = scan::schema(&provider, &config, &[]).map_err(|e| e.message)?;
+ let ffi = FFI_ArrowSchema::try_from(schema.as_ref()).map_err(|e| e.to_string())?;
+ // SAFETY: arrow-java allocated an empty ArrowSchema at this address.
+ unsafe { std::ptr::write(schema_addr as *mut FFI_ArrowSchema, ffi) };
+ Ok(())
+ })
+}
+
+/// Plan a scan. Returns an opaque handle (boxed [`ScanHandle`] pointer) as a
+/// `jlong`, or 0 after throwing on error. Release with `closeScan`.
+#[no_mangle]
+pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_createScan<'local>(
+ mut env: JNIEnv<'local>,
+ _class: JClass<'local>,
+ provider: JString<'local>,
+ config: JByteArray<'local>,
+ scan_request: JByteArray<'local>,
+) -> jlong {
+ ensure_registered();
+ try_or_throw(&mut env, 0, |env| {
+ let provider = read_string(env, &provider)?;
+ let config = read_bytes(env, &config)?;
+ let scan_request = read_bytes(env, &scan_request)?;
+ let request = build_request(&provider, &config, &scan_request)?;
+ let handle = scan::create(request).map_err(|e| e.message)?;
+ Ok(Box::into_raw(Box::new(handle)) as jlong)
+ })
+}
+
+/// Output partition count of a planned scan.
+#[no_mangle]
+pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_partitionCount<'local>(
+ mut env: JNIEnv<'local>,
+ _class: JClass<'local>,
+ handle: jlong,
+) -> jint {
+ try_or_throw(&mut env, 0, |_env| {
+ let scan = handle_ref(handle)?;
+ Ok(scan.partition_count() as jint)
+ })
+}
+
+/// Execute one partition, writing an `FFI_ArrowArrayStream` into the
+/// arrow-java-allocated `ArrowArrayStream` at `stream_addr`.
+#[no_mangle]
+pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_executeStreamPartition<'local>(
+ mut env: JNIEnv<'local>,
+ _class: JClass<'local>,
+ handle: jlong,
+ partition: jint,
+ stream_addr: jlong,
+) {
+ try_or_throw(&mut env, (), |_env| {
+ if partition < 0 {
+ return Err("partition index is negative".to_string());
+ }
+ let scan = handle_ref(handle)?;
+ let reader = scan
+ .execute_partition(partition as usize)
+ .map_err(|e| e.message)?;
+ write_stream(stream_addr, FFI_ArrowArrayStream::new(Box::new(reader)))
+ })
+}
+
+/// Execute the whole plan as a single coalesced stream.
+#[no_mangle]
+pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_executeStream<'local>(
+ mut env: JNIEnv<'local>,
+ _class: JClass<'local>,
+ handle: jlong,
+ stream_addr: jlong,
+) {
+ try_or_throw(&mut env, (), |_env| {
+ let scan = handle_ref(handle)?;
+ let reader = scan.execute_all().map_err(|e| e.message)?;
+ write_stream(stream_addr, FFI_ArrowArrayStream::new(Box::new(reader)))
+ })
+}
+
+/// Drop a planned scan. Null-safe; must not race an in-flight execute on the
+/// same handle (the Java wrapper enforces this).
+#[no_mangle]
+pub extern "system" fn Java_org_apache_datafusion_scan_NativeScan_closeScan<'local>(
+ _env: JNIEnv<'local>,
+ _class: JClass<'local>,
+ handle: jlong,
+) {
+ if handle != 0 {
+ // SAFETY: handle came from createScan and is not used afterwards.
+ drop(unsafe { Box::from_raw(handle as *mut ScanHandle) });
+ }
+}
+
+/// Borrow a [`ScanHandle`] from a `jlong`, erroring on null.
+fn handle_ref<'a>(handle: jlong) -> Result<&'a ScanHandle, String> {
+ if handle == 0 {
+ return Err("scan handle is null".to_string());
+ }
+ // SAFETY: handle came from createScan and outlives this borrow.
+ Ok(unsafe { &*(handle as *const ScanHandle) })
+}
+
+fn write_stream(stream_addr: jlong, ffi: FFI_ArrowArrayStream) -> Result<(), String> {
+ if stream_addr == 0 {
+ return Err("stream address is null".to_string());
+ }
+ // SAFETY: arrow-java allocated an empty ArrowArrayStream at this address.
+ unsafe { std::ptr::write(stream_addr as *mut FFI_ArrowArrayStream, ffi) };
+ Ok(())
+}
diff --git a/native/Cargo.toml b/native/Cargo.toml
index 0362ae6..c040448 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -17,14 +17,17 @@
[package]
name = "datafusion-jni"
-version = "0.1.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+# cdylib JNI artifact loaded by the JVM, not a crates.io library.
publish = false
[lib]
# `rlib` alongside `cdylib` so `cargo test` has a Rust-level harness for
-# native-only invariants (e.g. error-classification routing through wrapped
-# DataFusionError chains). The `cdylib` is still the artifact the JVM loads.
+# native-only invariants (the error-classification tests now live in
+# `datafusion-jni-common`). The `cdylib` is still the artifact the JVM loads.
crate-type = ["cdylib", "rlib"]
[features]
@@ -75,28 +78,27 @@ runtime-metrics = ["dep:tokio-metrics"]
spark = ["dep:datafusion-spark"]
[dependencies]
-arrow = { version = "58", features = ["ffi"] }
-async-trait = "0.1"
-datafusion = { version = "53.1.0", features = ["avro"] }
-datafusion-proto = "53.1.0"
+arrow = { workspace = true }
+async-trait = { workspace = true }
+datafusion = { workspace = true, features = ["avro"] }
+# Shared JNI plumbing (error->exception mapping, runtime singleton,
+# StreamingReader). `avro` keeps the classifier's AvroError->IoException arm
+# in sync with the `avro` feature on `datafusion` above.
+datafusion-jni-common = { path = "../native-common", features = ["avro"] }
+datafusion-proto = { workspace = true }
# Apache Spark-compatible functions + expression planners. Optional and
# gated behind the `spark` feature (in the default set). The `core` feature
# of the crate is what exposes `SessionStateBuilderSpark`.
-datafusion-spark = { version = "53.1.0", features = ["core"], optional = true }
-datafusion-substrait = { version = "53.1.0", optional = true }
-futures = "0.3"
-jni = "0.21"
-# Pin to the same major as DataFusion 53.1 pulls in transitively (0.13.x)
-# so we share the same `dyn ObjectStore` vtable and don't double-link.
-object_store = { version = "0.13", default-features = false }
-prost = "0.14"
-tokio = { version = "1", features = ["rt-multi-thread"] }
-# Tokio runtime metrics. Optional + cfg-gated: this crate's API surface lives
-# behind `--cfg tokio_unstable`, so enabling the `runtime-metrics` feature also
-# requires the caller to set `RUSTFLAGS="--cfg tokio_unstable"` at build time.
-tokio-metrics = { version = "0.5", optional = true }
-url = "2"
+datafusion-spark = { workspace = true, features = ["core"], optional = true }
+datafusion-substrait = { workspace = true, optional = true }
+futures = { workspace = true }
+jni = { workspace = true }
+object_store = { workspace = true }
+prost = { workspace = true }
+tokio = { workspace = true }
+tokio-metrics = { workspace = true, optional = true }
+url = { workspace = true }
[build-dependencies]
-prost-build = "0.14"
-protoc-bin-vendored = "3"
+prost-build = { workspace = true }
+protoc-bin-vendored = { workspace = true }
diff --git a/native/src/arrow.rs b/native/src/arrow.rs
index 2bbe7b0..67e5caf 100644
--- a/native/src/arrow.rs
+++ b/native/src/arrow.rs
@@ -23,10 +23,10 @@ use jni::sys::jlong;
use jni::JNIEnv;
use prost::Message;
-use crate::errors::{try_unwrap_or_throw, JniResult};
use crate::proto_gen::ArrowReadOptionsProto;
use crate::runtime;
use crate::schema::decode_optional_schema;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
fn with_arrow_options(
env: &mut JNIEnv,
diff --git a/native/src/avro.rs b/native/src/avro.rs
index 85d4a07..257ae32 100644
--- a/native/src/avro.rs
+++ b/native/src/avro.rs
@@ -23,10 +23,10 @@ use jni::sys::jlong;
use jni::JNIEnv;
use prost::Message;
-use crate::errors::{try_unwrap_or_throw, JniResult};
use crate::proto_gen::AvroReadOptionsProto;
use crate::runtime;
use crate::schema::decode_optional_schema;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
fn with_avro_options(
env: &mut JNIEnv,
diff --git a/native/src/cache_manager.rs b/native/src/cache_manager.rs
index 3b9e286..ec38dc8 100644
--- a/native/src/cache_manager.rs
+++ b/native/src/cache_manager.rs
@@ -34,8 +34,8 @@ use datafusion::execution::cache::cache_unit::{
};
use datafusion::execution::cache::DefaultListFilesCache;
-use crate::errors::JniResult;
use crate::proto_gen::CacheManagerOptionsProto;
+use datafusion_jni_common::errors::JniResult;
/// Build a [`CacheManagerConfig`] from the proto. Returns `Ok(None)` if the
/// caller did not set any cache-manager field, so the JNI layer can skip the
diff --git a/native/src/csv.rs b/native/src/csv.rs
index 3ae4627..b79ed59 100644
--- a/native/src/csv.rs
+++ b/native/src/csv.rs
@@ -26,12 +26,12 @@ use jni::sys::jlong;
use jni::JNIEnv;
use prost::Message;
-use crate::errors::{try_unwrap_or_throw, JniResult};
use crate::proto_gen::{
CsvReadOptionsProto, CsvWriteOptionsProto, FileCompressionType as ProtoFileCompressionType,
};
use crate::runtime;
use crate::schema::decode_optional_schema;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
fn with_csv_options(
env: &mut JNIEnv,
diff --git a/native/src/json.rs b/native/src/json.rs
index 8eea32f..b87be78 100644
--- a/native/src/json.rs
+++ b/native/src/json.rs
@@ -27,12 +27,12 @@ use jni::sys::jlong;
use jni::JNIEnv;
use prost::Message;
-use crate::errors::{try_unwrap_or_throw, JniResult};
use crate::proto_gen::{
FileCompressionType as ProtoFileCompressionType, JsonWriteOptionsProto, NdJsonReadOptionsProto,
};
use crate::runtime;
use crate::schema::decode_optional_schema;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
fn with_json_options(
env: &mut JNIEnv,
diff --git a/native/src/lib.rs b/native/src/lib.rs
index 43161d2..56bef5d 100644
--- a/native/src/lib.rs
+++ b/native/src/lib.rs
@@ -19,7 +19,6 @@ mod arrow;
mod avro;
mod cache_manager;
mod csv;
-mod errors;
mod jni_util;
mod json;
mod memory;
@@ -34,16 +33,13 @@ pub(crate) mod proto_gen {
include!(concat!(env!("OUT_DIR"), "/datafusion_java.rs"));
}
-use std::panic::{catch_unwind, AssertUnwindSafe};
use std::path::PathBuf;
use std::sync::{Arc, OnceLock};
-use datafusion::arrow::array::RecordBatch;
use datafusion::arrow::datatypes::SchemaRef;
-use datafusion::arrow::error::ArrowError;
use datafusion::arrow::ffi_stream::FFI_ArrowArrayStream;
use datafusion::arrow::ipc::writer::StreamWriter;
-use datafusion::arrow::record_batch::{RecordBatchIterator, RecordBatchReader};
+use datafusion::arrow::record_batch::RecordBatchIterator;
use datafusion::common::{JoinType, UnnestOptions};
use datafusion::config::TableParquetOptions;
use datafusion::dataframe::DataFrame;
@@ -51,11 +47,9 @@ use datafusion::dataframe::DataFrameWriteOptions;
use datafusion::error::DataFusionError;
use datafusion::execution::disk_manager::{DiskManagerBuilder, DiskManagerMode};
use datafusion::execution::runtime_env::{RuntimeEnv, RuntimeEnvBuilder};
-use datafusion::execution::SendableRecordBatchStream;
use datafusion::logical_expr::Expr;
use datafusion::logical_expr::{col, Partitioning, ScalarUDF, Signature, SortExpr};
use datafusion::prelude::{ParquetReadOptions, SessionConfig, SessionContext};
-use futures::StreamExt;
use jni::objects::{JBooleanArray, JByteArray, JClass, JObject, JObjectArray, JString};
use jni::sys::{jboolean, jbyte, jbyteArray, jint, jlong};
use jni::JNIEnv;
@@ -63,7 +57,10 @@ use jni::JavaVM;
use prost::Message;
use tokio::runtime::Runtime;
-use crate::errors::{try_unwrap_or_throw, JniResult};
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
+// Re-exported so sibling modules keep their crate-local `crate::StreamingReader` path.
+pub(crate) use datafusion_jni_common::StreamingReader;
+
use crate::proto_gen::ParquetReadOptionsProto;
use crate::proto_gen::SessionOptions;
use crate::schema::decode_optional_schema;
@@ -84,18 +81,15 @@ pub(crate) fn jvm() -> &'static JavaVM {
}
pub(crate) fn runtime() -> &'static Runtime {
- static RT: OnceLock = OnceLock::new();
- RT.get_or_init(|| {
- let rt = Runtime::new().expect("failed to create Tokio runtime");
- // Eagerly install the runtime-metrics accumulator (no-op when the
- // `runtime-metrics` Cargo feature is off). Initialising here -- not
- // lazily on the first `runtimeStats()` call -- means the
- // RuntimeMonitor's sampling baseline coincides with runtime start, so
- // poll/park/busy totals reflect activity from the first query onward
- // rather than from the first observation.
- crate::runtime_metrics::init(rt.handle());
- rt
- })
+ // The singleton itself lives in datafusion-jni-common (shared with the
+ // datafusion-spark-bridge SDK; each cdylib statically links its own
+ // copy, so the runtime stays per-library). The init hook eagerly installs the
+ // runtime-metrics accumulator (no-op when the `runtime-metrics` Cargo
+ // feature is off). Initialising here -- not lazily on the first
+ // `runtimeStats()` call -- means the RuntimeMonitor's sampling baseline
+ // coincides with runtime start, so poll/park/busy totals reflect activity
+ // from the first query onward rather than from the first observation.
+ datafusion_jni_common::runtime_with_init(crate::runtime_metrics::init)
}
/// Wrap the (already-built) `RuntimeEnvBuilder`'s memory pool with a
@@ -324,50 +318,6 @@ pub extern "system" fn Java_org_apache_datafusion_DataFrame_collectDataFrame<'lo
})
}
-/// Bridges DataFusion's async [`SendableRecordBatchStream`] to the synchronous
-/// [`RecordBatchReader`] interface that `FFI_ArrowArrayStream` (and therefore
-/// the Java `ArrowReader`) consumes. Each call to `next()` drives one
-/// `runtime().block_on(stream.next())`, so memory pressure stays bounded by the
-/// executor pipeline plus a single in-flight batch.
-struct StreamingReader {
- schema: SchemaRef,
- stream: SendableRecordBatchStream,
-}
-
-impl Iterator for StreamingReader {
- type Item = Result;
-
- fn next(&mut self) -> Option {
- // Arrow's C ABI invokes this iterator through FFI_ArrowArrayStream's
- // vtable, outside the JNI handler's try_unwrap_or_throw guard. A panic
- // here (buggy UDF, arrow cast that panics, runtime poison) would
- // unwind across C/FFI -- undefined behaviour. Catch it and surface as
- // an ArrowError so the Java side sees a normal exception instead.
- let next = catch_unwind(AssertUnwindSafe(|| runtime().block_on(self.stream.next())));
- match next {
- Ok(item) => item.map(|r| r.map_err(|e| ArrowError::ExternalError(Box::new(e)))),
- Err(panic) => {
- let msg = if let Some(s) = panic.downcast_ref::() {
- s.clone()
- } else if let Some(s) = panic.downcast_ref::<&str>() {
- (*s).to_string()
- } else {
- "rust panic with non-string payload".to_string()
- };
- Some(Err(ArrowError::ExternalError(
- format!("panic in DataFrame stream: {msg}").into(),
- )))
- }
- }
- }
-}
-
-impl RecordBatchReader for StreamingReader {
- fn schema(&self) -> SchemaRef {
- self.schema.clone()
- }
-}
-
#[no_mangle]
pub extern "system" fn Java_org_apache_datafusion_DataFrame_executeStreamDataFrame<'local>(
mut env: JNIEnv<'local>,
diff --git a/native/src/object_store.rs b/native/src/object_store.rs
index eefccf2..985d721 100644
--- a/native/src/object_store.rs
+++ b/native/src/object_store.rs
@@ -28,9 +28,9 @@ use std::sync::Arc;
use datafusion::prelude::SessionContext;
use url::Url;
-use crate::errors::JniResult;
use crate::proto_gen::object_store_registration::Backend;
use crate::proto_gen::ObjectStoreRegistration;
+use datafusion_jni_common::errors::JniResult;
#[cfg(feature = "object-store-gcp")]
use crate::proto_gen::GcsOptions;
diff --git a/native/src/proto.rs b/native/src/proto.rs
index 4f187bc..c1315f9 100644
--- a/native/src/proto.rs
+++ b/native/src/proto.rs
@@ -28,8 +28,8 @@ use jni::sys::{jbyteArray, jlong};
use jni::JNIEnv;
use prost::Message;
-use crate::errors::{try_unwrap_or_throw, JniResult};
use crate::runtime;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
#[no_mangle]
pub extern "system" fn Java_org_apache_datafusion_SessionContext_createDataFrameFromProto<
diff --git a/native/src/runtime_metrics.rs b/native/src/runtime_metrics.rs
index e69410e..dd60dcb 100644
--- a/native/src/runtime_metrics.rs
+++ b/native/src/runtime_metrics.rs
@@ -38,7 +38,7 @@
//! 10 totalOverflowCount
#[cfg(not(feature = "runtime-metrics"))]
-use crate::errors::JniResult;
+use datafusion_jni_common::errors::JniResult;
/// Number of i64 values in the snapshot array; kept here so the Java side and
/// the feature-off stub agree on the layout.
@@ -51,7 +51,7 @@ mod imp {
use tokio_metrics::{RuntimeIntervals, RuntimeMonitor};
use super::STATS_FIELD_COUNT;
- use crate::errors::JniResult;
+ use datafusion_jni_common::errors::JniResult;
/// `RuntimeMonitor::intervals().next()` returns *delta* metrics covering
/// the period since the previous call (or, on the very first call, since
@@ -196,7 +196,7 @@ pub fn runtime_stats() -> JniResult<[i64; STATS_FIELD_COUNT]> {
Err(
"datafusion-jni was built without the `runtime-metrics` Cargo feature; \
rebuild the native crate with \
- `RUSTFLAGS=\"--cfg tokio_unstable\" cargo build --features runtime-metrics` \
+ `RUSTFLAGS=\"--cfg tokio_unstable\" cargo build -p datafusion-jni --features runtime-metrics` \
to enable SessionContext.runtimeStats"
.into(),
)
diff --git a/native/src/schema.rs b/native/src/schema.rs
index 968a73a..0c3c7ab 100644
--- a/native/src/schema.rs
+++ b/native/src/schema.rs
@@ -20,7 +20,7 @@ use datafusion::arrow::ipc::reader::StreamReader;
use jni::objects::JByteArray;
use jni::JNIEnv;
-use crate::errors::JniResult;
+use datafusion_jni_common::errors::JniResult;
/// Decode an optional Arrow-IPC schema byte array passed in from Java.
/// Returns `None` if the byte-array reference is null.
diff --git a/pom.xml b/pom.xml
index 6210841..a48be6c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -33,6 +33,7 @@ under the License.
core
examples
+ spark
@@ -95,6 +96,11 @@ under the License.
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.13.0
+
org.apache.maven.plugins
maven-surefire-plugin
@@ -173,10 +179,10 @@ under the License.
.mvn/**
**/target/**
- native/target/**
+ rust-target/**
tpch-data/**
-
- native/Cargo.lock
+
+ Cargo.lock
dev/release/rat_exclude_files.txt
diff --git a/proto/scan_config.proto b/proto/scan_config.proto
new file mode 100644
index 0000000..43593bf
--- /dev/null
+++ b/proto/scan_config.proto
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+syntax = "proto3";
+
+package datafusion_java;
+
+import "csv_read_options.proto";
+import "json_read_options.proto";
+import "parquet_read_options.proto";
+import "avro_read_options.proto";
+import "arrow_read_options.proto";
+
+option java_package = "org.apache.datafusion.protobuf";
+option java_multiple_files = true;
+
+// Provider configuration carried in the `options` byte blob of the plain-C
+// scan ABI (`df_scan_schema` / `df_scan_create`). The ABI itself treats these
+// bytes as opaque; the registered provider builder named by `provider` decodes
+// them. This message is the encoding the in-tree builders agree on -- a custom
+// builder may ignore it and define its own.
+//
+// `provider` selects the registered builder (e.g. "datafusion.listing",
+// "datafusion.memory"). `source` carries that builder's parameters; `custom`
+// is an escape hatch for builders that define their own wire format.
+message ScanConfig {
+ string provider = 1;
+
+ oneof source {
+ ListingSource listing = 2;
+ bytes custom = 15;
+ }
+}
+
+// A file-backed listing source: one or more paths/URIs read with a single
+// file format. Mirrors DataFusion's ListingTable inputs. Object-store
+// credentials/endpoints are configured out of band (registered on the context
+// by the embedding cdylib), not here.
+message ListingSource {
+ // Files or directories. Globs and object-store URIs (s3://, gs://, ...) are
+ // allowed where the registered object store supports them.
+ repeated string paths = 1;
+
+ // The file format and its read options. Reuses the existing per-format
+ // option messages so encoders are shared with the rest of the binding.
+ oneof format {
+ CsvReadOptionsProto csv = 2;
+ NdJsonReadOptionsProto json = 3;
+ ParquetReadOptionsProto parquet = 4;
+ AvroReadOptionsProto avro = 5;
+ ArrowReadOptionsProto arrow = 6;
+ }
+
+ // Optional explicit schema as Arrow IPC schema-message bytes. Unset lets the
+ // provider infer it (e.g. from Parquet metadata or by sampling).
+ optional bytes schema_ipc = 7;
+}
+
+// Per-partition slice descriptor carried in the `partition` byte blob. Empty
+// for a whole-table scan. `index` lets the driver hand each executor task its
+// slice; `opaque` is builder-defined (e.g. a serialized file group), letting a
+// provider partition however it likes without the ABI knowing the shape.
+message ScanPartition {
+ uint32 index = 1;
+ bytes opaque = 2;
+}
diff --git a/proto/scan_request.proto b/proto/scan_request.proto
new file mode 100644
index 0000000..1770ee1
--- /dev/null
+++ b/proto/scan_request.proto
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+syntax = "proto3";
+
+package datafusion_java;
+
+option java_package = "org.apache.datafusion.protobuf";
+option java_multiple_files = true;
+
+// The pushdown a query engine (Spark DataSourceV2, etc.) captures for a scan.
+//
+// This is the *staging* object the engine populates during planning. It maps
+// onto the typed arguments of `df_scan_create` rather than being passed as a
+// single blob: the JNI shim / FFM layer decodes a ScanRequest and explodes it
+// into the call's `projection` / `filters` / `target_partitions` / ...
+// arguments. Keeping the C ABI's arguments typed (not one opaque protobuf)
+// keeps it FFM-friendly and language-neutral; this message just gives the
+// engine one structured thing to build and serialize across its own layers
+// (e.g. driver -> executor task) before the shim makes the native call.
+//
+// It is deliberately NOT the provider config: which provider and its
+// parameters live in ScanConfig (the `options` blob). A ScanRequest is purely
+// "given that provider, here is what to read."
+message ScanRequest {
+ // Pruned columns to project, by name. Empty selects all columns. Names
+ // match the provider's (pre-widening) output schema.
+ repeated string projection = 1;
+
+ // Pushed filters, each a serialized `datafusion.LogicalExprNode` (the same
+ // encoding `datafusion-ffi` uses). The engine translates whichever of its
+ // own predicates it can express and leaves the rest for itself to apply.
+ // The provider receives them as a conjunction (AND).
+ repeated bytes filters = 2;
+
+ // Optional row limit pushed into the scan. Unset means no limit. Advisory:
+ // the engine must still enforce its own limit, since not every plan honors
+ // it exactly.
+ optional uint64 limit = 3;
+
+ // Execution tuning resolved once on the driver and shipped to every executor
+ // so partition counts stay deterministic. <= 0 leaves the DataFusion
+ // default in place (matches the C ABI's convention).
+ int32 target_partitions = 4;
+ int32 batch_size = 5;
+
+ // Session config overrides applied to the scan's private context, e.g.
+ // {"datafusion.execution.parquet.pushdown_filters": "true"}. Resolved on the
+ // driver alongside the tuning above.
+ map config_overrides = 6;
+}
diff --git a/spark/pom.xml b/spark/pom.xml
new file mode 100644
index 0000000..26af4f1
--- /dev/null
+++ b/spark/pom.xml
@@ -0,0 +1,116 @@
+
+
+
+ 4.0.0
+
+
+ org.apache.datafusion
+ datafusion-java-parent
+ 0.2.0-SNAPSHOT
+
+
+ datafusion-spark
+ DataFusion Spark DataSource
+ A Spark DataSourceV2 backed by a DataFusion TableProvider via the plain-C scan ABI.
+
+
+ 4.0.0
+ 2.13
+
+ 18.1.0
+
+
+
+
+
+ org.apache.datafusion
+ datafusion-java
+ ${project.version}
+
+
+ org.apache.arrow
+ *
+
+
+
+
+
+
+ org.apache.spark
+ spark-sql_${scala.binary.version}
+ ${spark.version}
+ provided
+
+
+
+
+ org.apache.arrow
+ arrow-c-data
+ ${spark.arrow.version}
+ provided
+
+
+
+
+ org.junit.jupiter
+ junit-jupiter
+ test
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+
+
+
+ -Djava.library.path=${maven.multiModuleProjectDirectory}/rust-target/debug
+ --add-opens=java.base/java.lang=ALL-UNNAMED
+ --add-opens=java.base/java.lang.invoke=ALL-UNNAMED
+ --add-opens=java.base/java.io=ALL-UNNAMED
+ --add-opens=java.base/java.net=ALL-UNNAMED
+ --add-opens=java.base/java.nio=ALL-UNNAMED
+ --add-opens=java.base/java.util=ALL-UNNAMED
+ --add-opens=java.base/java.util.concurrent=ALL-UNNAMED
+ --add-opens=java.base/sun.nio.ch=ALL-UNNAMED
+ --add-opens=java.base/sun.security.action=ALL-UNNAMED
+
+
+
+
+
+
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionColumnarPartitionReader.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionColumnarPartitionReader.java
new file mode 100644
index 0000000..7dbb27b
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionColumnarPartitionReader.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import java.io.IOException;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowReader;
+import org.apache.datafusion.scan.DatafusionScan;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.vectorized.ArrowColumnVector;
+import org.apache.spark.sql.vectorized.ColumnVector;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+
+/**
+ * Reads one scan partition as Spark {@link ColumnarBatch}es, zero-copy.
+ *
+ * The Arrow vectors imported from the native stream are wrapped directly in Spark {@link
+ * ArrowColumnVector}s -- no per-cell copy. This requires the executor JVM to have a single
+ * arrow-java (the cluster's Spark Arrow); the connector compiles against that version and never
+ * bundles its own, so our import and Spark's {@code ArrowColumnVector} share the same classes.
+ *
+ *
Lifecycle: the underlying Arrow vectors are owned by the {@link ArrowReader}. We do not close
+ * the {@link ColumnarBatch} (which would close those vectors a second time); {@link #close()}
+ * closes the reader -- freeing the vectors once -- and then the allocator.
+ */
+final class DatafusionColumnarPartitionReader implements PartitionReader {
+
+ private final BufferAllocator allocator;
+ private final DatafusionScan scan;
+ private final ArrowReader reader;
+ private final VectorSchemaRoot root;
+ private final ColumnarBatch batch;
+
+ DatafusionColumnarPartitionReader(DatafusionInputPartition partition) {
+ this.allocator = new RootAllocator();
+ try {
+ this.scan =
+ DatafusionScan.create(partition.provider, partition.config, partition.scanRequest);
+ this.reader = scan.executePartition(allocator, partition.index);
+ this.root = reader.getVectorSchemaRoot();
+ this.batch = new ColumnarBatch(wrap(root));
+ } catch (IOException e) {
+ allocator.close();
+ throw new RuntimeException("failed to open scan partition " + partition.index, e);
+ } catch (RuntimeException e) {
+ allocator.close();
+ throw e;
+ }
+ }
+
+ /** Wrap each Arrow vector of the (reused) root as a Spark column vector, once. */
+ private static ColumnVector[] wrap(VectorSchemaRoot root) {
+ ColumnVector[] columns = new ColumnVector[root.getFieldVectors().size()];
+ int i = 0;
+ for (FieldVector vector : root.getFieldVectors()) {
+ columns[i++] = new ArrowColumnVector(vector);
+ }
+ return columns;
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ // The root's vectors are reloaded in place each batch; skip empty batches.
+ while (reader.loadNextBatch()) {
+ int rows = root.getRowCount();
+ if (rows > 0) {
+ batch.setNumRows(rows);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public ColumnarBatch get() {
+ return batch;
+ }
+
+ @Override
+ public void close() throws IOException {
+ try {
+ reader.close();
+ } finally {
+ try {
+ scan.close();
+ } finally {
+ allocator.close();
+ }
+ }
+ }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionInputPartition.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionInputPartition.java
new file mode 100644
index 0000000..8152aad
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionInputPartition.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import org.apache.spark.sql.connector.read.InputPartition;
+
+/**
+ * A serializable slice of a scan shipped to an executor. Carries only bytes and an index -- never a
+ * native handle, which would be meaningless in another process. The executor rebuilds the provider
+ * from {@code config} and runs partition {@code index}.
+ */
+final class DatafusionInputPartition implements InputPartition {
+
+ private static final long serialVersionUID = 1L;
+
+ final String provider;
+ final byte[] config;
+ final byte[] scanRequest;
+ final int index;
+
+ DatafusionInputPartition(String provider, byte[] config, byte[] scanRequest, int index) {
+ this.provider = provider;
+ this.config = config;
+ this.scanRequest = scanRequest;
+ this.index = index;
+ }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionPartitionReaderFactory.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionPartitionReaderFactory.java
new file mode 100644
index 0000000..2442eb2
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionPartitionReaderFactory.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
+import org.apache.spark.sql.vectorized.ColumnarBatch;
+
+/**
+ * Creates a columnar reader per partition. Serialized to executors, so it holds no state.
+ *
+ * Reads are columnar: {@link #supportColumnarReads} returns true, so Spark calls {@link
+ * #createColumnarReader} and consumes Arrow buffers directly via {@link
+ * DatafusionColumnarPartitionReader}. The row reader is unsupported.
+ */
+final class DatafusionPartitionReaderFactory implements PartitionReaderFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public boolean supportColumnarReads(InputPartition partition) {
+ return true;
+ }
+
+ @Override
+ public PartitionReader createColumnarReader(InputPartition partition) {
+ return new DatafusionColumnarPartitionReader((DatafusionInputPartition) partition);
+ }
+
+ @Override
+ public PartitionReader createReader(InputPartition partition) {
+ throw new UnsupportedOperationException("datafusion source reads are columnar");
+ }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanBuilder.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanBuilder.java
new file mode 100644
index 0000000..9cafd37
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanBuilder.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import java.util.List;
+
+import org.apache.datafusion.protobuf.ScanRequest;
+import org.apache.spark.sql.connector.read.Scan;
+import org.apache.spark.sql.connector.read.ScanBuilder;
+import org.apache.spark.sql.connector.read.SupportsPushDownFilters;
+import org.apache.spark.sql.connector.read.SupportsPushDownLimit;
+import org.apache.spark.sql.connector.read.SupportsPushDownRequiredColumns;
+import org.apache.spark.sql.sources.Filter;
+import org.apache.spark.sql.types.StructType;
+
+import com.google.protobuf.ByteString;
+
+/**
+ * Captures Spark's projection, filter, and limit pushdown, encoding them into the {@code
+ * ScanRequest} the scan ABI consumes.
+ */
+final class DatafusionScanBuilder
+ implements ScanBuilder,
+ SupportsPushDownRequiredColumns,
+ SupportsPushDownFilters,
+ SupportsPushDownLimit {
+
+ private final String provider;
+ private final byte[] config;
+
+ private StructType requiredSchema;
+ private Filter[] pushedFilters = new Filter[0];
+ private List pushedFilterBytes = List.of();
+ private int limit = -1;
+
+ DatafusionScanBuilder(StructType fullSchema, String provider, byte[] config) {
+ this.provider = provider;
+ this.config = config;
+ this.requiredSchema = fullSchema;
+ }
+
+ @Override
+ public void pruneColumns(StructType requiredSchema) {
+ this.requiredSchema = requiredSchema;
+ }
+
+ @Override
+ public Filter[] pushFilters(Filter[] filters) {
+ SparkFilters.Result result = SparkFilters.split(filters);
+ this.pushedFilters = result.pushedFilters();
+ this.pushedFilterBytes = result.pushed();
+ return result.postScan();
+ }
+
+ @Override
+ public Filter[] pushedFilters() {
+ return pushedFilters;
+ }
+
+ @Override
+ public boolean pushLimit(int limit) {
+ // DataFusion enforces the limit exactly (df.limit after filters), and a
+ // limited plan coalesces to a single output partition, so the total row
+ // count is bounded. Report it as fully handled.
+ this.limit = limit;
+ return true;
+ }
+
+ @Override
+ public Scan build() {
+ ScanRequest.Builder request = ScanRequest.newBuilder();
+ for (String name : requiredSchema.fieldNames()) {
+ request.addProjection(name);
+ }
+ for (byte[] filter : pushedFilterBytes) {
+ request.addFilters(ByteString.copyFrom(filter));
+ }
+ if (limit >= 0) {
+ request.setLimit(limit);
+ }
+ return new DatafusionScanImpl(provider, config, request.build().toByteArray(), requiredSchema);
+ }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanImpl.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanImpl.java
new file mode 100644
index 0000000..3a48fba
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionScanImpl.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import org.apache.datafusion.scan.DatafusionScan;
+import org.apache.spark.sql.connector.read.Batch;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.Scan;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * A planned DataFusion scan as a Spark {@link Scan}/{@link Batch}.
+ *
+ * {@link #planInputPartitions()} runs on the driver: it plans once to learn the partition count,
+ * then emits one serializable {@link DatafusionInputPartition} per partition carrying the config +
+ * request bytes (never a native handle). Each executor rebuilds and runs its own partition.
+ */
+final class DatafusionScanImpl implements Scan, Batch {
+
+ private final String provider;
+ private final byte[] config;
+ private final byte[] scanRequest;
+ private final StructType readSchema;
+
+ DatafusionScanImpl(String provider, byte[] config, byte[] scanRequest, StructType readSchema) {
+ this.provider = provider;
+ this.config = config;
+ this.scanRequest = scanRequest;
+ this.readSchema = readSchema;
+ }
+
+ /** The encoded ScanRequest bytes. Package-private for pushdown unit tests. */
+ byte[] scanRequestBytes() {
+ return scanRequest;
+ }
+
+ @Override
+ public StructType readSchema() {
+ return readSchema;
+ }
+
+ @Override
+ public Batch toBatch() {
+ return this;
+ }
+
+ @Override
+ public InputPartition[] planInputPartitions() {
+ int partitions;
+ try (DatafusionScan scan = DatafusionScan.create(provider, config, scanRequest)) {
+ partitions = scan.partitionCount();
+ }
+ InputPartition[] result = new InputPartition[partitions];
+ for (int i = 0; i < partitions; i++) {
+ result[i] = new DatafusionInputPartition(provider, config, scanRequest, i);
+ }
+ return result;
+ }
+
+ @Override
+ public PartitionReaderFactory createReaderFactory() {
+ return new DatafusionPartitionReaderFactory();
+ }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionTable.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionTable.java
new file mode 100644
index 0000000..d2e8f9d
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionTable.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import java.util.EnumSet;
+import java.util.Set;
+
+import org.apache.spark.sql.connector.catalog.SupportsRead;
+import org.apache.spark.sql.connector.catalog.TableCapability;
+import org.apache.spark.sql.connector.read.ScanBuilder;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/** A readable table over a DataFusion provider; produces {@link DatafusionScanBuilder}s. */
+final class DatafusionTable implements SupportsRead {
+
+ private final StructType schema;
+ private final String provider;
+ private final byte[] config;
+
+ DatafusionTable(StructType schema, String provider, byte[] config) {
+ this.schema = schema;
+ this.provider = provider;
+ this.config = config;
+ }
+
+ @Override
+ public String name() {
+ return "datafusion";
+ }
+
+ @Override
+ public StructType schema() {
+ return schema;
+ }
+
+ @Override
+ public Set capabilities() {
+ return EnumSet.of(TableCapability.BATCH_READ);
+ }
+
+ @Override
+ public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
+ return new DatafusionScanBuilder(schema, provider, config);
+ }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/DatafusionTableProvider.java b/spark/src/main/java/org/apache/datafusion/spark/DatafusionTableProvider.java
new file mode 100644
index 0000000..5d837d5
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/DatafusionTableProvider.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.datafusion.scan.DatafusionScan;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableProvider;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.sources.DataSourceRegister;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * Entry point for the {@code datafusion} Spark data source.
+ *
+ * Registered via {@code DataSourceRegister} so {@code
+ * spark.read.format("datafusion").option("path", ...).load()} resolves here. Options are decoded
+ * into a {@code ScanConfig} ({@link OptionsCodec}); the schema is probed once, on the driver,
+ * through {@link DatafusionScan#schema}.
+ */
+public final class DatafusionTableProvider implements TableProvider, DataSourceRegister {
+
+ @Override
+ public String shortName() {
+ return "datafusion";
+ }
+
+ @Override
+ public StructType inferSchema(CaseInsensitiveStringMap options) {
+ OptionsCodec.Source source = OptionsCodec.fromOptions(options);
+ try (BufferAllocator allocator = new RootAllocator()) {
+ Schema arrow = DatafusionScan.schema(allocator, source.provider(), source.config());
+ return SchemaConverter.toSparkSchema(arrow);
+ }
+ }
+
+ @Override
+ public Table getTable(
+ StructType schema, Transform[] partitioning, Map properties) {
+ OptionsCodec.Source source = OptionsCodec.fromOptions(new CaseInsensitiveStringMap(properties));
+ return new DatafusionTable(schema, source.provider(), source.config());
+ }
+
+ @Override
+ public boolean supportsExternalMetadata() {
+ return false;
+ }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/OptionsCodec.java b/spark/src/main/java/org/apache/datafusion/spark/OptionsCodec.java
new file mode 100644
index 0000000..7aaed5c
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/OptionsCodec.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import java.util.Locale;
+
+import org.apache.datafusion.protobuf.CsvReadOptionsProto;
+import org.apache.datafusion.protobuf.ListingSource;
+import org.apache.datafusion.protobuf.NdJsonReadOptionsProto;
+import org.apache.datafusion.protobuf.ParquetReadOptionsProto;
+import org.apache.datafusion.protobuf.ScanConfig;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * Translates Spark data-source options into a {@code ScanConfig} for the {@code datafusion.listing}
+ * provider.
+ *
+ * Recognized options: {@code path} (required), {@code format} ({@code csv|parquet|json}, default
+ * inferred from the path extension then {@code csv}), and for CSV {@code header} (default true) and
+ * {@code delimiter} (default {@code ,}).
+ */
+final class OptionsCodec {
+
+ static final String PROVIDER = "datafusion.listing";
+
+ private OptionsCodec() {}
+
+ /** The provider name plus the serialized ScanConfig the listing builder decodes. */
+ record Source(String provider, byte[] config) {}
+
+ static Source fromOptions(CaseInsensitiveStringMap options) {
+ String path = options.get("path");
+ if (path == null || path.isEmpty()) {
+ throw new IllegalArgumentException("the 'datafusion' source requires a 'path' option");
+ }
+ String format = options.containsKey("format") ? options.get("format") : inferFormat(path);
+
+ ListingSource.Builder listing = ListingSource.newBuilder().addPaths(path);
+ switch (format.toLowerCase(Locale.ROOT)) {
+ case "csv" ->
+ listing.setCsv(
+ CsvReadOptionsProto.newBuilder()
+ .setHasHeader(options.getBoolean("header", true))
+ .setDelimiter(delimiter(options))
+ .setQuote('"')
+ .setFileExtension(".csv")
+ .build());
+ case "parquet" ->
+ listing.setParquet(
+ ParquetReadOptionsProto.newBuilder().setFileExtension(".parquet").build());
+ case "json" ->
+ listing.setJson(NdJsonReadOptionsProto.newBuilder().setFileExtension(".json").build());
+ default -> throw new IllegalArgumentException("unsupported format: " + format);
+ }
+
+ byte[] config =
+ ScanConfig.newBuilder()
+ .setProvider(PROVIDER)
+ .setListing(listing.build())
+ .build()
+ .toByteArray();
+ return new Source(PROVIDER, config);
+ }
+
+ private static int delimiter(CaseInsensitiveStringMap options) {
+ String d = options.containsKey("delimiter") ? options.get("delimiter") : ",";
+ if (d.length() != 1) {
+ throw new IllegalArgumentException("delimiter must be a single character, got: " + d);
+ }
+ return d.charAt(0);
+ }
+
+ private static String inferFormat(String path) {
+ String lower = path.toLowerCase(Locale.ROOT);
+ if (lower.endsWith(".parquet")) {
+ return "parquet";
+ }
+ if (lower.endsWith(".json")) {
+ return "json";
+ }
+ return "csv";
+ }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/SchemaConverter.java b/spark/src/main/java/org/apache/datafusion/spark/SchemaConverter.java
new file mode 100644
index 0000000..d61d9c4
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/SchemaConverter.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * Converts an Arrow schema (produced by the scan ABI) into a Spark {@link StructType}.
+ *
+ *
Done directly rather than through Spark's {@code ArrowUtils} so the connector depends only on
+ * our Arrow version, never Spark's bundled one. Covers the primitive types the row reader produces;
+ * unsupported types fail fast.
+ */
+final class SchemaConverter {
+
+ private SchemaConverter() {}
+
+ static StructType toSparkSchema(Schema arrowSchema) {
+ StructType struct = new StructType();
+ for (Field field : arrowSchema.getFields()) {
+ struct = struct.add(field.getName(), toSparkType(field), field.isNullable());
+ }
+ return struct;
+ }
+
+ static DataType toSparkType(Field field) {
+ ArrowType type = field.getType();
+ if (type instanceof ArrowType.Int i) {
+ if (!i.getIsSigned()) {
+ throw unsupported(field);
+ }
+ return switch (i.getBitWidth()) {
+ case 8 -> DataTypes.ByteType;
+ case 16 -> DataTypes.ShortType;
+ case 32 -> DataTypes.IntegerType;
+ case 64 -> DataTypes.LongType;
+ default -> throw unsupported(field);
+ };
+ }
+ if (type instanceof ArrowType.FloatingPoint fp) {
+ return fp.getPrecision() == FloatingPointPrecision.DOUBLE
+ ? DataTypes.DoubleType
+ : DataTypes.FloatType;
+ }
+ if (type instanceof ArrowType.Utf8 || type instanceof ArrowType.LargeUtf8) {
+ return DataTypes.StringType;
+ }
+ if (type instanceof ArrowType.Bool) {
+ return DataTypes.BooleanType;
+ }
+ throw unsupported(field);
+ }
+
+ private static IllegalArgumentException unsupported(Field field) {
+ return new IllegalArgumentException(
+ "unsupported Arrow type for column '" + field.getName() + "': " + field.getType());
+ }
+}
diff --git a/spark/src/main/java/org/apache/datafusion/spark/SparkFilters.java b/spark/src/main/java/org/apache/datafusion/spark/SparkFilters.java
new file mode 100644
index 0000000..39be5c6
--- /dev/null
+++ b/spark/src/main/java/org/apache/datafusion/spark/SparkFilters.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.datafusion.protobuf.BinaryExprNode;
+import org.apache.datafusion.protobuf.IsNotNull;
+import org.apache.datafusion.protobuf.IsNull;
+import org.apache.datafusion.protobuf.LogicalExprNode;
+import org.apache.datafusion.protobuf.Not;
+import org.apache.spark.sql.sources.And;
+import org.apache.spark.sql.sources.EqualTo;
+import org.apache.spark.sql.sources.Filter;
+import org.apache.spark.sql.sources.GreaterThan;
+import org.apache.spark.sql.sources.GreaterThanOrEqual;
+import org.apache.spark.sql.sources.LessThan;
+import org.apache.spark.sql.sources.LessThanOrEqual;
+import org.apache.spark.sql.sources.Or;
+
+import datafusion_common.DatafusionCommon.Column;
+import datafusion_common.DatafusionCommon.ScalarValue;
+
+/**
+ * Translates Spark {@link Filter}s into serialized {@code datafusion.LogicalExprNode} bytes for
+ * filter pushdown.
+ *
+ *
Translates the comparison, boolean, and null predicates over primitive literals that map
+ * cleanly; anything else is reported as not pushed so Spark applies it itself. A translated filter
+ * is applied exactly by DataFusion (the scan core calls {@code DataFrame::filter}), so it is safe
+ * to treat it as fully handled.
+ */
+final class SparkFilters {
+
+ private SparkFilters() {}
+
+ /** Pushed filter bytes, and the filters Spark must still apply itself. */
+ record Result(List pushed, Filter[] pushedFilters, Filter[] postScan) {}
+
+ static Result split(Filter[] filters) {
+ List pushed = new ArrayList<>();
+ List pushedFilters = new ArrayList<>();
+ List postScan = new ArrayList<>();
+ for (Filter filter : filters) {
+ LogicalExprNode expr = translate(filter);
+ if (expr != null) {
+ pushed.add(expr.toByteArray());
+ pushedFilters.add(filter);
+ } else {
+ postScan.add(filter);
+ }
+ }
+ return new Result(
+ pushed, pushedFilters.toArray(new Filter[0]), postScan.toArray(new Filter[0]));
+ }
+
+ /** Translate a single filter, or return null if it cannot be expressed. */
+ private static LogicalExprNode translate(Filter filter) {
+ if (filter instanceof EqualTo f) {
+ return binary("Eq", f.attribute(), f.value());
+ }
+ if (filter instanceof GreaterThan f) {
+ return binary("Gt", f.attribute(), f.value());
+ }
+ if (filter instanceof GreaterThanOrEqual f) {
+ return binary("GtEq", f.attribute(), f.value());
+ }
+ if (filter instanceof LessThan f) {
+ return binary("Lt", f.attribute(), f.value());
+ }
+ if (filter instanceof LessThanOrEqual f) {
+ return binary("LtEq", f.attribute(), f.value());
+ }
+ if (filter instanceof org.apache.spark.sql.sources.IsNull f) {
+ return wrap(b -> b.setIsNullExpr(IsNull.newBuilder().setExpr(column(f.attribute()))));
+ }
+ if (filter instanceof org.apache.spark.sql.sources.IsNotNull f) {
+ return wrap(b -> b.setIsNotNullExpr(IsNotNull.newBuilder().setExpr(column(f.attribute()))));
+ }
+ if (filter instanceof And f) {
+ LogicalExprNode l = translate(f.left());
+ LogicalExprNode r = translate(f.right());
+ return (l == null || r == null) ? null : binaryNodes("And", l, r);
+ }
+ if (filter instanceof Or f) {
+ LogicalExprNode l = translate(f.left());
+ LogicalExprNode r = translate(f.right());
+ return (l == null || r == null) ? null : binaryNodes("Or", l, r);
+ }
+ if (filter instanceof org.apache.spark.sql.sources.Not f) {
+ LogicalExprNode child = translate(f.child());
+ return child == null ? null : wrap(b -> b.setNotExpr(Not.newBuilder().setExpr(child)));
+ }
+ return null;
+ }
+
+ private static LogicalExprNode binary(String op, String attribute, Object value) {
+ ScalarValue literal = scalar(value);
+ if (literal == null) {
+ return null;
+ }
+ return binaryNodes(
+ op, column(attribute), LogicalExprNode.newBuilder().setLiteral(literal).build());
+ }
+
+ private static LogicalExprNode binaryNodes(
+ String op, LogicalExprNode left, LogicalExprNode right) {
+ return LogicalExprNode.newBuilder()
+ .setBinaryExpr(BinaryExprNode.newBuilder().addOperands(left).addOperands(right).setOp(op))
+ .build();
+ }
+
+ private static LogicalExprNode column(String attribute) {
+ return LogicalExprNode.newBuilder().setColumn(Column.newBuilder().setName(attribute)).build();
+ }
+
+ private interface ExprFiller {
+ LogicalExprNode.Builder apply(LogicalExprNode.Builder builder);
+ }
+
+ private static LogicalExprNode wrap(ExprFiller filler) {
+ return filler.apply(LogicalExprNode.newBuilder()).build();
+ }
+
+ /** Map a Spark literal to a DataFusion ScalarValue, or null if unsupported. */
+ private static ScalarValue scalar(Object value) {
+ if (value instanceof Long v) {
+ return ScalarValue.newBuilder().setInt64Value(v).build();
+ }
+ if (value instanceof Integer v) {
+ return ScalarValue.newBuilder().setInt32Value(v).build();
+ }
+ if (value instanceof Double v) {
+ return ScalarValue.newBuilder().setFloat64Value(v).build();
+ }
+ if (value instanceof Float v) {
+ return ScalarValue.newBuilder().setFloat32Value(v).build();
+ }
+ if (value instanceof Boolean v) {
+ return ScalarValue.newBuilder().setBoolValue(v).build();
+ }
+ if (value instanceof String v) {
+ return ScalarValue.newBuilder().setUtf8Value(v).build();
+ }
+ return null;
+ }
+}
diff --git a/spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
new file mode 100644
index 0000000..fd603b1
--- /dev/null
+++ b/spark/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -0,0 +1 @@
+org.apache.datafusion.spark.DatafusionTableProvider
diff --git a/spark/src/test/java/org/apache/datafusion/spark/DatafusionScanBuilderTest.java b/spark/src/test/java/org/apache/datafusion/spark/DatafusionScanBuilderTest.java
new file mode 100644
index 0000000..b1695ba
--- /dev/null
+++ b/spark/src/test/java/org/apache/datafusion/spark/DatafusionScanBuilderTest.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.List;
+
+import org.apache.datafusion.protobuf.ScanRequest;
+import org.apache.spark.sql.connector.read.SupportsPushDownFilters;
+import org.apache.spark.sql.sources.Filter;
+import org.apache.spark.sql.sources.GreaterThanOrEqual;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructType;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Unit-level proof that the scan builder encodes pushdown into the ScanRequest, isolated from
+ * Spark's own limit/filter handling (which would mask whether we pushed anything).
+ */
+class DatafusionScanBuilderTest {
+
+ private static final StructType SCHEMA =
+ new StructType().add("id", DataTypes.LongType).add("name", DataTypes.StringType);
+
+ private DatafusionScanBuilder builder() {
+ return new DatafusionScanBuilder(SCHEMA, "datafusion.listing", new byte[0]);
+ }
+
+ private static ScanRequest decode(org.apache.spark.sql.connector.read.Scan scan)
+ throws Exception {
+ return ScanRequest.parseFrom(((DatafusionScanImpl) scan).scanRequestBytes());
+ }
+
+ @Test
+ void pushesLimit() throws Exception {
+ DatafusionScanBuilder b = builder();
+ assertTrue(b.pushLimit(7), "limit should be reported as fully pushed");
+ ScanRequest request = decode(b.build());
+ assertTrue(request.hasLimit());
+ assertEquals(7L, request.getLimit());
+ }
+
+ @Test
+ void noLimitWhenNotPushed() throws Exception {
+ ScanRequest request = decode(builder().build());
+ assertFalse(request.hasLimit(), "limit must be unset when Spark pushes none");
+ }
+
+ @Test
+ void pushesProjection() throws Exception {
+ DatafusionScanBuilder b = builder();
+ b.pruneColumns(new StructType().add("name", DataTypes.StringType));
+ ScanRequest request = decode(b.build());
+ assertEquals(List.of("name"), request.getProjectionList());
+ }
+
+ @Test
+ void pushesComparisonFilter() throws Exception {
+ DatafusionScanBuilder b = builder();
+ Filter[] residual =
+ ((SupportsPushDownFilters) b).pushFilters(new Filter[] {new GreaterThanOrEqual("id", 2L)});
+ assertEquals(0, residual.length, "a translatable filter should be fully pushed");
+ ScanRequest request = decode(b.build());
+ assertEquals(1, request.getFiltersCount());
+ }
+}
diff --git a/spark/src/test/java/org/apache/datafusion/spark/DatafusionSourceTest.java b/spark/src/test/java/org/apache/datafusion/spark/DatafusionSourceTest.java
new file mode 100644
index 0000000..4165921
--- /dev/null
+++ b/spark/src/test/java/org/apache/datafusion/spark/DatafusionSourceTest.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datafusion.spark;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.functions;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+/**
+ * End-to-end test of the {@code datafusion} Spark data source against a local SparkSession: the
+ * connector reads a CSV through the DataFusion listing provider and the plain-C scan ABI, all the
+ * way back to Spark rows. Covers schema inference, full scan, projection, and filter pushdown.
+ */
+class DatafusionSourceTest {
+
+ private static SparkSession spark;
+
+ @TempDir static Path tmp;
+
+ @BeforeAll
+ static void startSpark() {
+ spark =
+ SparkSession.builder()
+ .master("local[2]")
+ .appName("datafusion-source-test")
+ .config("spark.ui.enabled", "false")
+ .config("spark.sql.shuffle.partitions", "2")
+ .getOrCreate();
+ }
+
+ @AfterAll
+ static void stopSpark() {
+ if (spark != null) {
+ spark.stop();
+ }
+ }
+
+ private Dataset read() throws Exception {
+ Path csv = tmp.resolve("data.csv");
+ Files.writeString(csv, "id,name\n1,a\n2,b\n3,c\n");
+ return spark
+ .read()
+ .format("datafusion")
+ .option("path", csv.toString())
+ .option("format", "csv")
+ .load();
+ }
+
+ @Test
+ void inferredSchema() throws Exception {
+ List columns = Arrays.asList(read().schema().fieldNames());
+ assertEquals(List.of("id", "name"), columns);
+ }
+
+ @Test
+ void fullScanReturnsAllRows() throws Exception {
+ assertEquals(3, read().count());
+ }
+
+ @Test
+ void projectionSelectsColumns() throws Exception {
+ Dataset names = read().select("name");
+ assertEquals(List.of("name"), Arrays.asList(names.schema().fieldNames()));
+ assertEquals(3, names.count());
+ }
+
+ @Test
+ void limitPushdownCapsRows() throws Exception {
+ assertEquals(2, read().limit(2).count());
+ }
+
+ @Test
+ void filterPushdownReducesRows() throws Exception {
+ Dataset filtered = read().filter(functions.col("id").geq(2));
+ assertEquals(2, filtered.count());
+
+ List ids = filtered.select("id").as(org.apache.spark.sql.Encoders.LONG()).collectAsList();
+ assertTrue(ids.stream().allMatch(id -> id >= 2), "all surviving ids should be >= 2");
+ assertEquals(2L + 3L, ids.stream().mapToLong(Long::longValue).sum());
+ }
+}