1

I followed the maven instructions to include the arrow-dataset in pom.xml However, when running the code, it complained arrow-dataset-jni.dll not found

How to create or install dll ?

Thank you J

Jac
  • 29
  • 2

2 Answers2

1

Base on the master branch DLL is already supported and implemented. This will be available on the next release.

If you like to work with DLL support at this moment, you have 02 options:

  1. Build the master Java branch project locally (java/dataset module)
  2. Use current nightly packages repository

Then, steps could be:

1.- Download new jar Dataset / C Data locally from nightly packages mention

2.- Install new jar Dataset / C Data locally:

# intall dataset manually
mvn install:install-file -Dfile="C:\Users\dsusanibar\IdeaProjects\win-cookbooks\src\main\resources\files\arrow-dataset-10.0.0-SNAPSHOT.pom" -DgroupId="org.apache.arrow" -DartifactId="arrow-dataset" -Dversion="10.0.0-SNAPSHOT" -Dpackaging="pom"
mvn install:install-file -Dfile="C:\Users\dsusanibar\IdeaProjects\win-cookbooks\src\main\resources\files\arrow-dataset-10.0.0-SNAPSHOT.jar" -DgroupId="org.apache.arrow" -DartifactId="arrow-dataset" -Dversion="10.0.0-SNAPSHOT" -Dpackaging="jar"
# install c data interface manually
mvn install:install-file -Dfile="C:\Users\dsusanibar\IdeaProjects\win-cookbooks\src\main\resources\files\arrow-c-data-10.0.0-SNAPSHOT.pom" -DgroupId="org.apache.arrow" -DartifactId="arrow-c-data" -Dversion="10.0.0-SNAPSHOT" -Dpackaging="pom"
mvn install:install-file -Dfile="C:\Users\dsusanibar\IdeaProjects\win-cookbooks\src\main\resources\files\arrow-c-data-10.0.0-SNAPSHOT.jar" -DgroupId="org.apache.arrow" -DartifactId="arrow-c-data" -Dversion="10.0.0-SNAPSHOT" -Dpackaging="jar"

3.- Add new Dataset / C Data Interface dependencies into your project (Maven/Gradle)

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>win-cookbooks</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>11</maven.compiler.source>
        <maven.compiler.target>11</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <arrow.version>9.0.0</arrow.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-dataset</artifactId>
            <version>10.0.0-SNAPSHOT</version>
        </dependency>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-memory-netty</artifactId>
            <version>${arrow.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-c-data</artifactId>
            <version>10.0.0-SNAPSHOT</version>
        </dependency>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-memory-core</artifactId>
            <version>${arrow.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-format</artifactId>
            <version>${arrow.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.arrow</groupId>
            <artifactId>arrow-vector</artifactId>
            <version>${arrow.version}</version>
        </dependency>
    </dependencies>
</project>

4.- Create Dataset with mew Dataset jar that contains DLL arrow_dataset_jni.dll + Read RecordBatches with new C Data Interface that contains DLL arrow_cdata_jni.dll:

import org.apache.arrow.dataset.file.FileFormat;
import org.apache.arrow.dataset.file.FileSystemDatasetFactory;
import org.apache.arrow.dataset.jni.NativeMemoryPool;
import org.apache.arrow.dataset.scanner.ScanOptions;
import org.apache.arrow.dataset.scanner.Scanner;
import org.apache.arrow.dataset.source.Dataset;
import org.apache.arrow.dataset.source.DatasetFactory;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.ipc.ArrowReader;

import java.io.IOException;
import java.net.URISyntaxException;

public class Recipe {
    public static void main(String[] args) throws URISyntaxException {
        // File at: https://github.com/apache/arrow-cookbook/blob/main/java/thirdpartydeps/parquetfiles/data1.parquet
        String uri = "file:///C:\\Users\\dsusanibar\\IdeaProjects\\win-cookbooks\\src\\main\\resources\\files\\data1.parquet";
        ScanOptions options = new ScanOptions(/*batchSize*/ 5);
        try (
            BufferAllocator allocator = new RootAllocator();
            DatasetFactory datasetFactory = new FileSystemDatasetFactory(allocator, NativeMemoryPool.getDefault(), FileFormat.PARQUET, uri);
            Dataset dataset = datasetFactory.finish();
            Scanner scanner = dataset.newScan(options)
        ) {
            scanner.scan().forEach(scanTask -> {
                try (ArrowReader reader = scanTask.execute()) {
                    while (reader.loadNextBatch()) {
                        final int[] count = {1};
                        try (VectorSchemaRoot root = reader.getVectorSchemaRoot()) {
                            System.out.println("Number of rows per batch["+ count[0]++ +"]: " + root.getRowCount());
                            System.out.println(root.contentToTSVString());
                        }
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
            });
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

Result:
Number of rows per batch[1]: 3
id  name
1   David
2   Gladis
3   Juan

Please let us know if this work on your side.

0

i have the same issue, i have soleved this by update arrow.version = 11.0.0, and copy whole x86_64 package of arrow-c-data into my resources ,then run the test, the test run correctly. i debugged find jndi load occur the issue :

try (final InputStream is = JniWrapper.class.getClassLoader().getResourceAsStream(libraryToLoad)) {

so i copyed the package to my resources

my structure like this:

.
├── java
│   └── com
│       └── proinnova
│           ├── mv
│           │   └── DuckDbTest.java
└── resources
    ├── application.properties
    ├── logback-test.xml
    └── x86_64
        ├── arrow_cdata_jni.dll
        ├── libarrow_cdata_jni.dylib
        └── libarrow_cdata_jni.so
steven
  • 377
  • 3
  • 14