apache · pitrou · Feb 28, 2024 · Jan 23, 2024 · Feb 7, 2024 · Feb 8, 2024
diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
@@ -587,7 +587,8 @@ struct ArrayExporter {
     export_.buffers_.resize(n_buffers);
     std::transform(buffers_begin, data->buffers.end(), export_.buffers_.begin(),
                    [](const std::shared_ptr<Buffer>& buffer) -> const void* {
-                     return buffer ? buffer->data() : nullptr;
+                     return buffer ? reinterpret_cast<const void*>(buffer->address())
+                                   : nullptr;
                    });
 
     if (need_variadic_buffer_sizes) {
@@ -1977,6 +1978,24 @@ Result<std::shared_ptr<Array>> ImportDeviceArray(struct ArrowDeviceArray* array,
   return ImportDeviceArray(array, *maybe_type, mapper);
 }
 
+Result<std::shared_ptr<MemoryManager>> DefaultDeviceMapper(ArrowDeviceType device_type,
+                                                           int64_t device_id) {
+  if (device_type != ARROW_DEVICE_CPU) {
+    return Status::NotImplemented("Only importing data on CPU is supported");
+  }
+  return default_cpu_memory_manager();
+}
+
+Result<std::shared_ptr<Array>> ImportDeviceArray(struct ArrowDeviceArray* array,
+                                                 std::shared_ptr<DataType> type) {
+  return ImportDeviceArray(array, type, DefaultDeviceMapper);
+}
+
+Result<std::shared_ptr<Array>> ImportDeviceArray(struct ArrowDeviceArray* array,
+                                                 struct ArrowSchema* type) {
+  return ImportDeviceArray(array, type, DefaultDeviceMapper);
+}
+
 Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
     struct ArrowDeviceArray* array, std::shared_ptr<Schema> schema,
     const DeviceMemoryMapper& mapper) {
@@ -1997,6 +2016,16 @@ Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
   return ImportDeviceRecordBatch(array, *maybe_schema, mapper);
 }
 
+Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
+    struct ArrowDeviceArray* array, std::shared_ptr<Schema> schema) {
+  return ImportDeviceRecordBatch(array, schema, DefaultDeviceMapper);
+}
+
+Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
+    struct ArrowDeviceArray* array, struct ArrowSchema* schema) {
+  return ImportDeviceRecordBatch(array, schema, DefaultDeviceMapper);
+}
+
 //////////////////////////////////////////////////////////////////////////
 // C stream export
 

diff --git a/cpp/src/arrow/c/bridge.h b/cpp/src/arrow/c/bridge.h
@@ -249,6 +249,13 @@ Result<std::shared_ptr<Array>> ImportDeviceArray(struct ArrowDeviceArray* array,
                                                  struct ArrowSchema* type,
                                                  const DeviceMemoryMapper& mapper);
 
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> ImportDeviceArray(struct ArrowDeviceArray* array,
+                                                 std::shared_ptr<DataType> type);
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> ImportDeviceArray(struct ArrowDeviceArray* array,
+                                                 struct ArrowSchema* type);
+
 /// \brief EXPERIMENTAL: Import C++ record batch with buffers on a device from the C data
 /// interface.
 ///
@@ -285,6 +292,13 @@ Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
     struct ArrowDeviceArray* array, struct ArrowSchema* schema,
     const DeviceMemoryMapper& mapper);
 
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
+    struct ArrowDeviceArray* array, std::shared_ptr<Schema> schema);
+ARROW_EXPORT
+Result<std::shared_ptr<RecordBatch>> ImportDeviceRecordBatch(
+    struct ArrowDeviceArray* array, struct ArrowSchema* schema);
+
 /// @}
 
 /// \defgroup c-stream-interface Functions for working with the C data interface.

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
@@ -1778,6 +1778,70 @@ cdef class Array(_PandasConvertible):
 
         return pyarrow_wrap_array(array)
 
+    def _export_to_c_device(self, out_ptr, out_schema_ptr=0):
+        """
+        Export to a C ArrowDeviceArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the array type
+        is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowDeviceArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+        cdef:
+            void* c_ptr = _as_c_pointer(out_ptr)
+            void* c_schema_ptr = _as_c_pointer(out_schema_ptr,
+                                               allow_null=True)
+        with nogil:
+            check_status(ExportDeviceArray(
+                deref(self.sp_array), <shared_ptr[CSyncEvent]>NULL,
+                <ArrowDeviceArray*> c_ptr, <ArrowSchema*> c_schema_ptr))
+
+    @staticmethod
+    def _import_from_c_device(in_ptr, type):
+        """
+        Import Array from a C ArrowDeviceArray struct, given its pointer
+        and the imported array type.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        type: DataType or int
+            Either a DataType object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+        cdef:
+            void* c_ptr = _as_c_pointer(in_ptr)
+            void* c_type_ptr
+            shared_ptr[CArray] c_array
+
+        c_type = pyarrow_unwrap_data_type(type)
+        if c_type == nullptr:
+            # Not a DataType object, perhaps a raw ArrowSchema pointer
+            c_type_ptr = _as_c_pointer(type)
+            with nogil:
+                c_array = GetResultValue(
+                    ImportDeviceArray(<ArrowDeviceArray*> c_ptr,
+                                      <ArrowSchema*> c_type_ptr)
+                )
+        else:
+            with nogil:
+                c_array = GetResultValue(
+                    ImportDeviceArray(<ArrowDeviceArray*> c_ptr, c_type)
+                )
+        return pyarrow_wrap_array(c_array)
+
     def __dlpack__(self, stream=None):
         """Export a primitive array as a DLPack capsule.
 

diff --git a/python/pyarrow/cffi.py b/python/pyarrow/cffi.py
@@ -64,6 +64,16 @@
       // Opaque producer-specific data
       void* private_data;
     };
+
+    typedef int32_t ArrowDeviceType;
+
+    struct ArrowDeviceArray {
+      struct ArrowArray array;
+      int64_t device_id;
+      ArrowDeviceType device_type;
+      void* sync_event;
+      int64_t reserved[3];
+    };
     """
 
 # TODO use out-of-line mode for faster import and avoid C parsing

diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
@@ -343,6 +343,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     CResult[unique_ptr[CResizableBuffer]] AllocateResizableBuffer(
         const int64_t size, CMemoryPool* pool)
 
+    cdef cppclass CSyncEvent" arrow::Device::SyncEvent":
+        pass
+
+    cdef cppclass CDevice" arrow::Device":
+        pass
+
     cdef CMemoryPool* c_default_memory_pool" arrow::default_memory_pool"()
     cdef CMemoryPool* c_system_memory_pool" arrow::system_memory_pool"()
     cdef CStatus c_jemalloc_memory_pool" arrow::jemalloc_memory_pool"(
@@ -2811,6 +2817,9 @@ cdef extern from "arrow/c/abi.h":
     cdef struct ArrowArrayStream:
         void (*release)(ArrowArrayStream*) noexcept nogil
 
+    cdef struct ArrowDeviceArray:
+        pass
+
 cdef extern from "arrow/c/bridge.h" namespace "arrow" nogil:
     CStatus ExportType(CDataType&, ArrowSchema* out)
     CResult[shared_ptr[CDataType]] ImportType(ArrowSchema*)
@@ -2840,6 +2849,19 @@ cdef extern from "arrow/c/bridge.h" namespace "arrow" nogil:
     CResult[shared_ptr[CRecordBatchReader]] ImportRecordBatchReader(
         ArrowArrayStream*)
 
+    CStatus ExportDeviceArray(const CArray&, shared_ptr[CSyncEvent],
+                              ArrowDeviceArray* out, ArrowSchema*)
+    CResult[shared_ptr[CArray]] ImportDeviceArray(
+        ArrowDeviceArray*, shared_ptr[CDataType])
+    CResult[shared_ptr[CArray]] ImportDeviceArray(
+        ArrowDeviceArray*, ArrowSchema*)
+
+    CStatus ExportDeviceRecordBatch(const CRecordBatch&, shared_ptr[CSyncEvent],
+                                    ArrowDeviceArray* out, ArrowSchema*)
+    CResult[shared_ptr[CRecordBatch]] ImportDeviceRecordBatch(
+        ArrowDeviceArray*, shared_ptr[CSchema])
+    CResult[shared_ptr[CRecordBatch]] ImportDeviceRecordBatch(
+        ArrowDeviceArray*, ArrowSchema*)
 
 cdef extern from "arrow/util/byte_size.h" namespace "arrow::util" nogil:
     CResult[int64_t] ReferencedBufferSize(const CArray& array_data)

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
@@ -3084,6 +3084,68 @@ cdef class RecordBatch(_Tabular):
 
         return pyarrow_wrap_batch(c_batch)
 
+    def _export_to_c_device(self, out_ptr, out_schema_ptr=0):
+        """
+        Export to a C ArrowDeviceArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the record batch
+        schema is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowDeviceArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+        cdef:
+            void* c_ptr = _as_c_pointer(out_ptr)
+            void* c_schema_ptr = _as_c_pointer(out_schema_ptr,
+                                               allow_null=True)
+        with nogil:
+            check_status(ExportDeviceRecordBatch(
+                deref(self.sp_batch), <shared_ptr[CSyncEvent]>NULL,
+                <ArrowDeviceArray*> c_ptr, <ArrowSchema*> c_schema_ptr)
+            )
+
+    @staticmethod
+    def _import_from_c_device(in_ptr, schema):
+        """
+        Import RecordBatch from a C ArrowDeviceArray struct, given its pointer
+        and the imported schema.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowDeviceArray struct.
+        type: Schema or int
+            Either a Schema object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+        cdef:
+            void* c_ptr = _as_c_pointer(in_ptr)
+            void* c_schema_ptr
+            shared_ptr[CRecordBatch] c_batch
+
+        c_schema = pyarrow_unwrap_schema(schema)
+        if c_schema == nullptr:
+            # Not a Schema object, perhaps a raw ArrowSchema pointer
+            c_schema_ptr = _as_c_pointer(schema, allow_null=True)
+            with nogil:
+                c_batch = GetResultValue(ImportDeviceRecordBatch(
+                    <ArrowDeviceArray*> c_ptr, <ArrowSchema*> c_schema_ptr))
+        else:
+            with nogil:
+                c_batch = GetResultValue(ImportDeviceRecordBatch(
+                    <ArrowDeviceArray*> c_ptr, c_schema))
+        return pyarrow_wrap_batch(c_batch)
+
 
 def _reconstruct_record_batch(columns, schema):
     """

diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py
@@ -601,3 +601,115 @@ def test_roundtrip_batch_reader_capsule():
     assert imported_reader.read_next_batch().equals(batch)
     with pytest.raises(StopIteration):
         imported_reader.read_next_batch()
+
+
+@needs_cffi
+def test_export_import_device_array():
+    c_schema = ffi.new("struct ArrowSchema*")
+    ptr_schema = int(ffi.cast("uintptr_t", c_schema))
+    c_array = ffi.new("struct ArrowDeviceArray*")
+    ptr_array = int(ffi.cast("uintptr_t", c_array))
+
+    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
+    old_allocated = pa.total_allocated_bytes()
+
+    # Type is known up front
+    typ = pa.list_(pa.int32())
+    arr = pa.array([[1], [2, 42]], type=typ)
+    py_value = arr.to_pylist()
+    arr._export_to_c_device(ptr_array)
+    assert pa.total_allocated_bytes() > old_allocated
+
+    # verify exported struct
+    assert c_array.device_type == 1  # ARROW_DEVICE_CPU 1
+    assert c_array.device_id == -1
+    assert c_array.array.length == 2
+
+    # Delete recreate C++ object from exported pointer
+    del arr
+    arr_new = pa.Array._import_from_c_device(ptr_array, typ)
+    assert arr_new.to_pylist() == py_value
+    assert arr_new.type == pa.list_(pa.int32())
+    assert pa.total_allocated_bytes() > old_allocated
+    del arr_new, typ
+    assert pa.total_allocated_bytes() == old_allocated
+    # Now released
+    with assert_array_released:
+        pa.Array._import_from_c(ptr_array, pa.list_(pa.int32()))
+
+    # Type is exported and imported at the same time
+    arr = pa.array([[1], [2, 42]], type=pa.list_(pa.int32()))
+    py_value = arr.to_pylist()
+    arr._export_to_c(ptr_array, ptr_schema)
+    # Delete and recreate C++ objects from exported pointers
+    del arr
+    arr_new = pa.Array._import_from_c(ptr_array, ptr_schema)
+    assert arr_new.to_pylist() == py_value
+    assert arr_new.type == pa.list_(pa.int32())
+    assert pa.total_allocated_bytes() > old_allocated
+    del arr_new
+    assert pa.total_allocated_bytes() == old_allocated
+    # Now released
+    with assert_schema_released:
+        pa.Array._import_from_c(ptr_array, ptr_schema)
+
+
+@needs_cffi
+def test_export_import_device_batch():
+    c_schema = ffi.new("struct ArrowSchema*")
+    ptr_schema = int(ffi.cast("uintptr_t", c_schema))
+    c_array = ffi.new("struct ArrowDeviceArray*")
+    ptr_array = int(ffi.cast("uintptr_t", c_array))
+
+    gc.collect()  # Make sure no Arrow data dangles in a ref cycle
+    old_allocated = pa.total_allocated_bytes()
+
+    # Schema is known up front
+    batch = make_batch()
+    schema = batch.schema
+    py_value = batch.to_pydict()
+    batch._export_to_c_device(ptr_array)
+    assert pa.total_allocated_bytes() > old_allocated
+
+    # verify exported struct
+    assert c_array.device_type == 1  # ARROW_DEVICE_CPU 1
+    assert c_array.device_id == -1
+    assert c_array.array.length == 2
+
+    # Delete and recreate C++ object from exported pointer
+    del batch
+    batch_new = pa.RecordBatch._import_from_c_device(ptr_array, schema)
+    assert batch_new.to_pydict() == py_value
+    assert batch_new.schema == schema
+    assert pa.total_allocated_bytes() > old_allocated
+    del batch_new, schema
+    assert pa.total_allocated_bytes() == old_allocated
+    # Now released
+    with assert_array_released:
+        pa.RecordBatch._import_from_c_device(ptr_array, make_schema())
+
+    # Type is exported and imported at the same time
+    batch = make_batch()
+    py_value = batch.to_pydict()
+    batch._export_to_c_device(ptr_array, ptr_schema)
+    # Delete and recreate C++ objects from exported pointers
+    del batch
+    batch_new = pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)
+    assert batch_new.to_pydict() == py_value
+    assert batch_new.schema == make_batch().schema
+    assert pa.total_allocated_bytes() > old_allocated
+    del batch_new
+    assert pa.total_allocated_bytes() == old_allocated
+    # Now released
+    with assert_schema_released:
+        pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)
+
+    # Not a struct type
+    pa.int32()._export_to_c(ptr_schema)
+    make_batch()._export_to_c_device(ptr_array)
+    with pytest.raises(ValueError,
+                       match="ArrowSchema describes non-struct type"):
+        pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)
+    # Now released
+    with assert_schema_released:
+        pa.RecordBatch._import_from_c_device(ptr_array, ptr_schema)