1

In the C Data Interface page of Arrow Java Document, the Java code allocated some memory, and the C++ code fill the allocated memory. While it seems arrow-rs works in a different way, it fills the data first and then pass the pointer to a foreign language. So I can't just reuse the Java part of C++/Java communication naively.

In the rust part I return the addresses of FFI_ArrowArray and FFI_ArrowSchema use pointer as i64 because JNI can't recognize a pointer.

pub fn array_example() -> Int32Array
{
    let array = Int32Array::from(vec![Some(1), None, Some(3)]);
    assert_eq!(array.len(), 3);
    assert_eq!(array.value(0), 1);
    assert_eq!(array.is_null(1), true);

    let collected: Vec<_> = array.iter().collect();
    assert_eq!(collected, vec![Some(1), None, Some(3)]);
    assert_eq!(array.values(), [1, 0, 3]);
    array
}

pub fn export_array_example() -> [i64; 2] {
    // Export it
    let array = array_example();
    let data = array.data();
    let out_array = FFI_ArrowArray::new(&data);
    let out_schema = FFI_ArrowSchema::try_from(data.data_type()).unwrap();

    // Use ManuallyDrop to avoid Box:Drop recursing
    let schema = Box::new(ManuallyDrop::new(out_schema));
    let array = Box::new(ManuallyDrop::new(out_array));

    let schema_ptr = &**schema as *const _;
    let array_ptr = &**array as *const _;
    let schema_addr = schema_ptr as i64;
    let array_addr = array_ptr as i64;

    let schema_addr2 = schema_addr as *const FFI_ArrowSchema;
    let array_addr2 = array_addr as *const FFI_ArrowArray;
    let array = unsafe {
        ArrowArray::new(std::ptr::read(array_addr2), std::ptr::read(schema_addr2))
    };

    let array = Int32Array::from(ArrayData::try_from(array).unwrap());
    println!("pointer schema as long: {}", schema_addr);
    println!("pointer array as long: {}", array_addr);
    println!("recovered arr: {:?}",array);

    [schema_addr, array_addr]
    //https://docs.rs/arrow/33.0.0/arrow/ffi/index.html
    //https://arrow.apache.org/docs/java/cdata.html#java-to-c
    //https://github.com/apache/arrow-rs/blob/3761ac53cab55c269b06d9a13825dd81b03e0c11/arrow/src/ffi.rs#L579-L580
}

In the Java part I use ArrowSchema.wrap and ArrowArray.wrap

public class Converter {

    static {
        System.loadLibrary("lance_jni");
    }

    public static void getInt32ArrayExample() {
        BufferAllocator allocator = new RootAllocator();
        ConverterJni.getInt32Arr();
        long[] arr = ConverterJni.getInt32Arr();
        System.out.println(arr[0]);
        System.out.println(arr[1]);
        ArrowSchema arrowSchema = ArrowSchema.wrap(arr[0]);
        ArrowArray array = ArrowArray.wrap(arr[1]);
        var vec = Data.importVector(allocator, array, arrowSchema, null);
    }
}

Eventually I get

[info] #
[info] # A fatal error has been detected by the Java Runtime Environment:
[info] #
[info] #  SIGSEGV (0xb) at pc=0x00000001127708e7, pid=47483, tid=5891
[info] #
[info] # JRE version: OpenJDK Runtime Environment Homebrew (11.0.12) (build 11.0.12+0)
[info] # Java VM: OpenJDK 64-Bit Server VM Homebrew (11.0.12+0, mixed mode, tiered, compressed oops, g1 gc, bsd-amd64)
[info] # Problematic frame:
[info] # J 414 c2 java.nio.DirectByteBuffer.get()B java.base@11.0.12 (28 bytes) @ 0x00000001127708e7 [0x00000001127708a0+0x0000000000000047]

Full example at github

Renkai
  • 1,991
  • 2
  • 13
  • 18

0 Answers0