In a Python extension module I've written in C++, I use the following snippet of code to convert a NumPy array into an Armadillo array for use in the C++ portion of the code:
static arma::mat convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
// Check if the dimensions are what I expect.
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
const std::vector<int> dims = getPyArrayDimensions(pyarr); // Gets the dimensions using the API
PyArray_Descr* reqDescr = PyArray_DescrFromType(NPY_DOUBLE);
if (reqDescr == NULL) throw std::bad_alloc();
// Convert the array to Fortran-ordering as required by Armadillo
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr,
NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
double* dataPtr = static_cast<double*>(PyArray_DATA(cleanArr));
arma::mat result (dataPtr, dims[0], dims[1], true); // this copies the data from cleanArr
Py_DECREF(cleanArr);
return result;
}
The problem is that when I pass this a view of a NumPy array (i.e. my_array[:, 3]
), it doesn't seem to handle the strides of the underlying C array correctly. Based on the output, it seems like the array pyarr
received by the function is actually the full base array, and not the view (or at least when I access the data using PyArray_DATA
, I seem to be getting a pointer to the full base array). If I instead pass this function a copy of the view (i.e. my_array[:, 3].copy()
), it works as expected, but I don't want to have to remember to do that every time.
So, is there a way to make PyArray_FromArray
copy only the slice of the matrix I want? I tried using the flag NPY_ARRAY_ENSURECOPY
, but that didn't help.
Edit 1
As suggested in the comments, here is a full working example:
In file example.cpp
:
#define NPY_NO_DEPRECATED_API NPY_1_9_API_VERSION
extern "C" {
#include <Python.h>
#include <numpy/arrayobject.h>
}
#include <exception>
#include <cassert>
#include <string>
#include <type_traits>
#include <map>
#include <vector>
#include <armadillo>
class WrongDimensions : public std::exception
{
public:
WrongDimensions() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "The dimensions were incorrect";
};
class NotImplemented : public std::exception
{
public:
NotImplemented() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "Not implemented";
};
class BadArrayLayout : public std::exception
{
public:
BadArrayLayout() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "The matrix was not contiguous";
};
static const std::vector<npy_intp> getPyArrayDimensions(PyArrayObject* pyarr)
{
npy_intp ndims = PyArray_NDIM(pyarr);
npy_intp* dims = PyArray_SHAPE(pyarr);
std::vector<npy_intp> result;
for (int i = 0; i < ndims; i++) {
result.push_back(dims[i]);
}
return result;
}
/* Checks the dimensions of the given array. Pass -1 for either dimension to say you don't
* care what the size is in that dimension. Pass dimensions (X, 1) for a vector.
*/
static bool checkPyArrayDimensions(PyArrayObject* pyarr, const npy_intp dim0, const npy_intp dim1)
{
const auto dims = getPyArrayDimensions(pyarr);
assert(dims.size() <= 2 && dims.size() > 0);
if (dims.size() == 1) {
return (dims[0] == dim0 || dim0 == -1) && (dim1 == 1 || dim1 == -1);
}
else {
return (dims[0] == dim0 || dim0 == -1) && (dims[1] == dim1 || dim1 == -1);
}
}
template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
int arrTypeCode;
if (std::is_same<outT, uint16_t>::value) {
arrTypeCode = NPY_UINT16;
}
else if (std::is_same<outT, double>::value) {
arrTypeCode = NPY_DOUBLE;
}
else {
throw NotImplemented();
}
const auto dims = getPyArrayDimensions(pyarr);
if (dims.size() == 1) {
outT* dataPtr = static_cast<outT*>(PyArray_DATA(pyarr));
return arma::Col<outT>(dataPtr, dims[0], true);
}
else {
PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
if (reqDescr == NULL) throw std::bad_alloc();
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));
arma::Mat<outT> result (dataPtr, dims[0], dims[1], true); // this copies the data from cleanArr
Py_DECREF(cleanArr);
return result;
}
}
static PyObject* convertArmaToPyArray(const arma::mat& matrix)
{
npy_intp ndim = matrix.is_colvec() ? 1 : 2;
npy_intp nRows = static_cast<npy_intp>(matrix.n_rows); // NOTE: This narrows the integer
npy_intp nCols = static_cast<npy_intp>(matrix.n_cols);
npy_intp dims[2] = {nRows, nCols};
PyObject* result = PyArray_SimpleNew(ndim, dims, NPY_DOUBLE);
if (result == NULL) throw std::bad_alloc();
double* resultDataPtr = static_cast<double*>(PyArray_DATA((PyArrayObject*)result));
for (int i = 0; i < nRows; i++) {
for (int j = 0; j < nCols; j++) {
resultDataPtr[i * nCols + j] = matrix(i, j);
}
}
return result;
}
extern "C" {
// An example function that takes a NumPy array and converts it to
// an arma::mat and back. This should return the array unchanged.
static PyObject* example_testFunction(PyObject* self, PyObject* args)
{
PyArrayObject* myArray = NULL;
if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &myArray)) {
return NULL;
}
PyObject* output = NULL;
try {
arma::mat myMat = convertPyArrayToArma<double>(myArray, -1, -1);
output = convertArmaToPyArray(myMat);
}
catch (const std::bad_alloc&) {
PyErr_NoMemory();
Py_XDECREF(output);
return NULL;
}
catch (const std::exception& err) {
PyErr_SetString(PyExc_RuntimeError, err.what());
Py_XDECREF(output);
return NULL;
}
return output;
}
static PyMethodDef example_methods[] =
{
{"test_function", example_testFunction, METH_VARARGS, "A test function"},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef example_module = {
PyModuleDef_HEAD_INIT,
"example", /* name of module */
NULL, /* module documentation, may be NULL */
-1, /* size of per-interpreter state of the module,
or -1 if the module keeps state in global variables. */
example_methods
};
PyMODINIT_FUNC
PyInit_example(void)
{
import_array();
PyObject* m = PyModule_Create(&example_module);
if (m == NULL) return NULL;
return m;
}
}
And setup.py
for compiling:
from setuptools import setup, Extension
import numpy as np
example_module = Extension(
'example',
include_dirs=[np.get_include(), '/usr/local/include'],
libraries=['armadillo'],
library_dirs=['/usr/local/lib'],
sources=['example.cpp'],
language='c++',
extra_compile_args=['-std=c++11', '-mmacosx-version-min=10.10'],
)
setup(name='example',
ext_modules=[example_module],
)
Now assume we have the example array
a = np.array([[ 1, 2, 3, 4, 5, 6],
[ 7, 8, 9,10,11,12],
[13,14,15,16,17,18]], dtype='float64')
The function seems to work fine for multidimensional slices like a[:, :3]
, and it returns the matrix unaltered as I'd expect. But if I give it a single-dimensional slice, I get the wrong components unless I make a copy:
>>> example.test_function(a[:, 3])
array([ 4., 5., 6.])
>>> example.test_function(a[:, 3].copy())
array([ 4., 10., 16.])