I am trying to build a python module in C++ using pybind11. I have the following code:
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/numpy.h>
namespace py = pybind11;
struct ContainerElement
{
uint8_t i;
double d;
double d2;
};
class Container
{
private:
std::vector<uint8_t> ints;
std::vector<double> doubles;
std::vector<double> doubles2;
public:
std::vector<uint8_t>& getInts() { return ints; }
std::vector<double>& getDoubles() { return doubles; }
std::vector<double>& getDoubles2() { return doubles2; }
void addElement(ContainerElement element)
{
ints.emplace_back(element.i);
doubles.emplace_back(element.d);
doubles2.emplace_back(element.d2);
}
};
void fillContainer(Container& container)
{
for (int i = 0; i < 1e6; ++i)
{
container.addElement({(uint8_t)i, (double)i,(double)i });
}
}
PYBIND11_MODULE(containerInterface, m) {
py::class_<Container>(m, "Container")
.def(py::init<>())
.def("getInts", [](Container& container)
{
return py::array_t<uint8_t>(
{ container.getInts().size() },
{ sizeof(uint8_t) },
container.getInts().data());
})
.def("getDoubles", [](Container& container)
{
return py::array_t<double>(
{ container.getDoubles().size() },
{ sizeof(double) },
container.getDoubles().data());
})
.def("getDoubles2", [](Container& container)
{
return py::array_t<double>(
{ container.getDoubles2().size() },
{ sizeof(double) },
container.getDoubles2().data());
});
m.def("fillContainer", &fillContainer);
}
When I call this code in python:
import containerInterface
container = containerInterface.Container()
containerInterface.fillContainer(container)
i = container.getInts()
d = container.getDoubles()
d2 = container.getDoubles2()
This works, however when I check the memory usage of the program (using psutil.Process(os.getpid()).memory_info().rss
) it seems to make a copy when I call the functions getInts, getDoubles
and getDoubles2
. Is there a way to avoid this?
I have tried using np.array(container.getInts(), copy=False)
, but it still makes a copy. Also I tried using the py::buffer_protocol()
on the Container class as mentioned here: https://pybind11.readthedocs.io/en/stable/advanced/pycpp/numpy.html . However I can only make that work for either the Ints vector or the Doubles vectors and not for all at the same time.
PYBIND11_MODULE(containerInterface, m) {
py::class_<Container>(m, "Container", py::buffer_protocol())
.def(py::init<>())
.def("getInts", &Container::getInts)
.def("getDoubles", &Container::getDoubles)
.def_buffer([](Container& container) -> py::buffer_info {
return py::buffer_info(
container.getInts().data(),
sizeof(uint8_t),
py::format_descriptor<uint8_t>::format(),
1,
{ container.getInts().size() },
{ sizeof(uint8_t) * container.getInts().size() }
);
});
m.def("fillContainer", &fillContainer);
Then I can use i = np.array(container, copy=False)
, without a copy being made. However as I said it only works for the Ints
vector now.