So after reading the [ https://arrow.apache.org/docs/python/integration/extending.html ](Apache Python Extending Documentation), Arrow CPP no longer includes PyArrow header files and those header files are included in the PyArrow. I am able to identify the locations of the pyarrow files which is at ${CONDA_PRFIX}/lib/python3.9/site-packages/pyarrow/include.
In my CMakeFiles.txt, I added
set(PYARROW_INCLUDE_DIR "${Python_SITELIB}/pyarrow/include")
set(PYARROW_INCLUDE_LOCAL_DIR "<HARDCODED_PATH>/lib/python3.9/site-packages/pyarrow/include")
IF(EXISTS ${PYARROW_INCLUDE_DIR})
message(STATUS "Found and using pyarrow include dir: ${PYARROW_INCLUDE_DIR}")
include_directories(${PYARROW_INCLUDE_DIR})
ELSEIF(EXISTS ${PYARROW_INCLUDE_LOCAL_DIR})
message(STATUS "Found and using local development pyarrow include dir: ${PYARROW_INCLUDE_LOCAL_DIR}")
include_directories(${PYARROW_INCLUDE_LOCAL_DIR})
# target_include_directories(${PROJECT_NAME} PRIVATE ${PYARROW_INCLUDE_LOCAL_DIR})
ELSE()
message(FATAL_ERROR "pyarrow include dir not found")
ENDIF()
...
pybind11_add_module(${PROJECT_NAME} pyluban_cpp_ext.cpp ${SRCS_CODE})
target_link_libraries(${PROJECT_NAME} PRIVATE pybind11::module Arrow::arrow_shared GTest::gtest_main ${Boost_LIBRARIES})
target_include_directories(${PROJECT_NAME} PRIVATE ${PYARROW_INCLUDE_LOCAL_DIR})
The purpose of this is to enable custom casting between Python and C++ for PyArrow Tables:
#include <arrow/python/pyarrow.h>
#include <Python.h>
#include <utility>
#include <memory>
#include <arrow/table.h>
namespace py = pybind11;
namespace PYBIND11_NAMESPACE { namespace detail {
template <> struct type_caster<std::shared_ptr<arrow::Table>> {
public:
PYBIND11_TYPE_CASTER(std::shared_ptr<arrow::Table>, _("pyarrow::Table"));
bool load(handle src, bool) {
PyObject *source = src.ptr();
if (!arrow::py::is_table(source))
return false;
arrow::Result<std::shared_ptr<arrow::Table>> result = arrow::py::unwrap_table(source);
if(!result.ok())
return false;
value = result.ValueOrDie();
return true;
}
static handle cast(std::shared_ptr<arrow::Table> src, return_value_policy /* policy */, handle /* parent */) {
return arrow::py::wrap_table(src);
}
};
}} // namespace PYBIND11_NAMESPACE::detail
I was able to build the pybind11 module with CMake
rm -rf build
cmake -S . -B build
cmake --build build -j 2
cmake --install build --prefix $CONDA_PREFIX
But when I tried importing the Python module, I had this error:
Traceback (most recent call last):
File "<string>", line 1, in <module>
ImportError: dlopen(/project_name/build/pyluban_cpp_ext.cpython-39-darwin.so, 0x0002): symbol not found in flat namespace (__ZN5arrow2py10wrap_tableERKNSt3__110shared_ptrINS_5TableEEE)
Traceback (most recent call last):
File "/project_name/build/test_script.py", line 1, in <module>
from project_name import RollingTableManager
ImportError: dlopen(/project_name/build/project_name.cpython-39-darwin.so, 0x0002): symbol not found in flat namespace (__ZN5arrow2py10wrap_tableERKNSt3__110shared_ptrINS_5TableEEE)
which is clear that I had linking issues for the header files in pyarrow. This is the results of tree -L 2
at "<HARDCODED_PATH>/lib/python3.9/site-packages/pyarrow/include":
.
└── arrow
├── acero
├── adapters
├── api.h
├── array
├── array.h
├── buffer.h
├── buffer_builder.h
├── builder.h
├── c
├── chunk_resolver.h
├── chunked_array.h
├── compare.h
├── compute
├── config.h
├── csv
├── dataset
├── datum.h
├── device.h
├── engine
├── extension
├── extension_type.h
├── filesystem
├── flight
├── io
├── ipc
├── json
├── memory_pool.h
├── memory_pool_test.h
├── pch.h
├── pretty_print.h
├── python
├── record_batch.h
├── result.h
├── scalar.h
├── sparse_tensor.h
├── status.h
├── stl.h
├── stl_allocator.h
├── stl_iterator.h
├── table.h
├── table_builder.h
├── tensor
├── tensor.h
├── testing
├── type.h
├── type_fwd.h
├── type_traits.h
├── util
├── vendored
├── visit_array_inline.h
├── visit_data_inline.h
├── visit_scalar_inline.h
├── visit_type_inline.h
├── visitor.h
└── visitor_generate.h
Does anyone have any idea what did I do wrong in the CMakeFiles part?
I tried to link pyarrow header files into my C++ arrow pybind11 project for creating a python module. However I am unable to link up the header files successfully and I am asking for advices here on what went wrong.