Here is a solution that uses Eigen ver 3.4.0.
You can #include <execution>
for the STL algorithms if desired for extra parallelism.
#include <Eigen/Dense>
#include <algorithm>
#include <functional>
#include <iostream>
int main()
{
Eigen::MatrixXf const m{{1, 0, 0},
{0, 5, 6},
{0, 0, 9}};
auto const size = m.size();
// create 1D view
auto const view = m.reshaped().transpose();
// create boolean markers for nonzeros
auto const mask = view.array() != 0;
// create index list and set useless elements to sentinel value
auto constexpr sentinel = std::numeric_limits<int>::lowest();
auto idxs = mask.select(Eigen::RowVectorXi::LinSpaced(size, 0, size), sentinel).eval();
// sort to remove sentinel values
std::partial_sort(idxs.begin(), idxs.begin() + size, idxs.end(), std::greater{});
idxs.conservativeResize(mask.count());
auto const nonzeros = view(idxs.reverse()).eval();
std::cout << nonzeros << std::endl;
}
Output:
1 5 6 9