-1

I am trying to implement a GSL Nonlinear least-squares algorithm for curve fitting in R using Rcpp. This question is close to a previous question I asked here: Fixing parameters of a fitting function in Nonlinear Least-Square GSL

My attempt to implement a GSL-based Nonlinear least-squares algorithm has been successful if my objective is to estimate all the parameter of a given function, that is used to fit the data. The problem comes when I try to follow @zkoza suggestion in Fixing parameters of a fitting function in Nonlinear Least-Square GSL for fixing some of the parameters of the function.

When I sourceCpp my code, adapted following my previous question I do get the following error:

Error in dyn.load("/private/var/folders/pq/hxwd9my563q_qpy4rbrlgkmw0000gn/T/RtmpRKdn9f/sourceCpp-x86_64-apple-darwin17.0-1.0.8.3/sourcecpp_a60fe49985e/sourceCpp_80.so") : 
  unable to load shared object '/private/var/folders/pq/hxwd9my563q_qpy4rbrlgkmw0000gn/T/RtmpRKdn9f/sourceCpp-x86_64-apple-darwin17.0-1.0.8.3/sourcecpp_a60fe49985e/sourceCpp_80.so':
  dlopen(/private/var/folders/pq/hxwd9my563q_qpy4rbrlgkmw0000gn/T/RtmpRKdn9f/sourceCpp-x86_64-apple-darwin17.0-1.0.8.3/sourcecpp_a60fe49985e/sourceCpp_80.so, 0x0006): symbol not found in flat namespace '__Z28internal_make_gsl_vector_ptrILm3EEP10gsl_vectorRKNSt3__15arrayIdXT_EEE' 

This is the C++ wrapper code for performing the nonlinear least-squares data fitting:

// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::depends(RcppNumerical)]]
// [[Rcpp::depends(RcppGSL)]]
// [[Rcpp::depends(BH)]]
#define EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#include <RcppNumerical.h>
#include <RcppGSL.h>
#include <array>
#include <Rcpp.h>
#include <iostream>
#include <vector>
#include <cassert>
#include <functional>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_multifit_nlinear.h>

using namespace std;
using namespace Rcpp;
using namespace Numer;

template <class R, class... ARGS>
struct function_ripper {
    static constexpr size_t n_args = sizeof...(ARGS);
};

template <class R, class... ARGS>
auto constexpr n_params(R (ARGS...) )
{
    return function_ripper<R, ARGS...>();
}

template <typename F, size_t... Is>
auto gen_tuple_impl(F func, std::index_sequence<Is...> )
{
    return std::make_tuple(func(Is)...);
}

template <size_t N, typename F>
auto gen_tuple(F func)
{
    return gen_tuple_impl(func, std::make_index_sequence<N>{} );
}

auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
                           gsl_multifit_nlinear_parameters *params) -> std::vector<double>
{
    // This specifies a trust region method
    const gsl_multifit_nlinear_type *T = gsl_multifit_nlinear_trust;
    const size_t max_iter = 200;
    const double xtol = 1.0e-8;
    const double gtol = 1.0e-8;
    const double ftol = 1.0e-8;

    auto *work = gsl_multifit_nlinear_alloc(T, params, fdf->n, fdf->p);
    int info;

    // initialize solver
    gsl_multifit_nlinear_init(initial_params, fdf, work);
    //iterate until convergence
    gsl_multifit_nlinear_driver(max_iter, xtol, gtol, ftol, nullptr, nullptr, &info, work);

    // result will be stored here
    gsl_vector * y    = gsl_multifit_nlinear_position(work);
    auto result = std::vector<double>(initial_params->size);

    for(int i = 0; i < result.size(); i++)
    {
        result[i] = gsl_vector_get(y, i);
    }

    auto niter = gsl_multifit_nlinear_niter(work);
    auto nfev  = fdf->nevalf;
    auto njev  = fdf->nevaldf;
    auto naev  = fdf->nevalfvv;
    gsl_multifit_nlinear_free(work);
    gsl_vector_free(initial_params);
    return result;
}

auto internal_make_gsl_vector_ptr(const std::vector<double>& vec) -> gsl_vector*
{
    auto* result = gsl_vector_alloc(vec.size());
    int i = 0;
    for(const auto e: vec)
    {
        gsl_vector_set(result, i, e);
        i++;
    }
    return result;
}

template<typename C1>
struct fit_data
{
    const std::vector<double>& t;
    const std::vector<double>& y;
    // the actual function to be fitted
    C1 f;
};

template<typename FitData, int n_params>
int internal_f(const gsl_vector* x, void* params, gsl_vector *f)
{
    auto* d  = static_cast<FitData*>(params);
    // Convert the parameter values from gsl_vector (in x) into std::tuple
    auto init_args = [x](int index)
    {
        return gsl_vector_get(x, index);
    };
    auto parameters = gen_tuple<n_params>(init_args);

    // Calculate the error for each...
    for (size_t i = 0; i < d->t.size(); ++i)
    {
        double ti = d->t[i];
        double yi = d->y[i];
        auto func = [ti, &d](auto ...xs)
        {
            // call the actual function to be fitted
            return d->f(ti, xs...);
        };
        auto y = std::apply(func, parameters);
        gsl_vector_set(f, i, yi - y);
    }
    return GSL_SUCCESS;
}

using func_f_type   = int (*) (const gsl_vector*, void*, gsl_vector*);
using func_df_type  = int (*) (const gsl_vector*, void*, gsl_matrix*);
using func_fvv_type = int (*) (const gsl_vector*, const gsl_vector *, void *, gsl_vector *);

template<auto n>
auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*;


auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
             gsl_multifit_nlinear_parameters *params) -> std::vector<double>;

template<typename C1>
auto curve_fit_impl(func_f_type f, func_df_type df, func_fvv_type fvv, gsl_vector* initial_params, fit_data<C1>& fd) -> std::vector<double>
{
    assert(fd.t.size() == fd.y.size());

    auto fdf = gsl_multifit_nlinear_fdf();
    auto fdf_params = gsl_multifit_nlinear_default_parameters();

    fdf.f   = f;
    fdf.df  = df;
    fdf.fvv = fvv;
    fdf.n   = fd.t.size();
    fdf.p   = initial_params->size;
    fdf.params = &fd;

    // "This selects the Levenberg-Marquardt algorithm with geodesic acceleration."
    fdf_params.trs = gsl_multifit_nlinear_trs_lmaccel;
    return internal_solve_system(initial_params, &fdf, &fdf_params);
}

template <typename Callable, auto n>
auto curve_fit(Callable f, const std::array<double, n>& initial_params, const std::vector<double>& x, const std::vector<double>& y) -> std::vector<double>
{
    auto params = internal_make_gsl_vector_ptr(initial_params);
    auto fd = fit_data<Callable>{x, y, f};
    return curve_fit_impl(internal_f<decltype(fd), n>, nullptr, nullptr, params,  fd);
}

And these are the functions I used to fit the data:

// [[Rcpp::export]]
double gaussian(double x, double a, double b, double c){
    const double z = (x - b) / c;
    return a * std::exp(-0.5 * z * z);
}

struct gaussian_fixed_a{
    double a;
    gaussian_fixed_a(double a) : a{a} {}
    double operator()(double x, double b, double c) const { return gaussian(x, a, b, c); }
};

// [[Rcpp::export]]
Rcpp::List fittingTest(const std::vector<double> xs,const std::vector<double> ys, const double a){

      gaussian_fixed_a g(a);
      auto r = curve_fit(g, std::array{0.444, 0.11}, xs, ys);
      return Rcpp::List::create(Rcpp::Named("b") = r[0],
                                  Rcpp::Named("c") = r[1]);
        }

Any idea where my code is creating the linking problem?

CafféSospeso
  • 1,101
  • 3
  • 11
  • 28
  • 1
    "Any idea where my code is creating the linking problem?" - The error suggests it's somewhere in the `gsl` libraries. A good place to start would be to make a *minimal* example that only calls a function from ``. Then build up your code from there to narrow down the issue. – SymbolixAU Jul 05 '22 at 22:39
  • 1
    I followed your suggestion and updated my question. However, the code depending on gsl_vector.h compiles and works well. What do you think? – CafféSospeso Jul 06 '22 at 11:31
  • 1
    Add another function, then another, and keep adding functions and libraries until you get the error again. Then you'll know the cause and can narrow it down. – SymbolixAU Jul 06 '22 at 23:08
  • 1
    Precisely. Start from a minimal working package that use Rcpp and GSL, possibly via RcppGSL. My very small RcppZiggurat is an example. Then build up from there. Debugging is often done 'from both sides': grow from a minimal working base, and reduce down from a borked non-working state. Until you meet in the middle and have your stuff working. You need to abstract away the distractions that are orthogonal to your point at hand, yet your questions are (repeatedly) full of these distractions. Let to reduce questions to their essence. – Dirk Eddelbuettel Jul 08 '22 at 14:29
  • 1
    SO, I just solved the problem. I will answer to my own question providing the full code of such implementation. Others may find it useful or could improve it, perhaps. – CafféSospeso Jul 08 '22 at 15:29
  • Excellent; please do post your solution as an answer. – SymbolixAU Jul 09 '22 at 00:20
  • Strong second for what @SymbolixAU asks here. Your pattern of _repeatedly_ asking questions, sometimes quickfire, you then leave dangling just reduces the motivation of others to help you. Not in your longer-term interest I surmise. – Dirk Eddelbuettel Jul 12 '22 at 11:52
  • Yes, sorry..I was occupied with other related issues. But you are right. I'm going to implement my answer to this post now. Thanks for pointing it out. – CafféSospeso Jul 12 '22 at 15:45

1 Answers1

1

The error was solved by following @zkoza answer in , that is specifying a compile-time array where the number of parameters is automatically deduced from the length of the array. In Line 81 - 92:

template<auto n>
auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*
{
    auto* result = gsl_vector_alloc(vec.size());
    int i = 0;
    for(const auto e: vec)
    {
        gsl_vector_set(result, i, e);
        i++;
    }
    return result;
}

The full code now looks like:

#include <RcppGSL.h>
#include <array>
#include <Rcpp.h>
#include <cmath>
#include <iostream>
#include <cstdlib>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <vector>
#include <cassert>
#include <functional>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_multifit_nlinear.h>

using namespace std;
using namespace Rcpp;

// [[Rcpp::depends(RcppGSL)]]

template <typename F, size_t... Is>
auto gen_tuple_impl(F func, std::index_sequence<Is...> )
{
    return std::make_tuple(func(Is)...);
}

template <size_t N, typename F>
auto gen_tuple(F func)
{
    return gen_tuple_impl(func, std::make_index_sequence<N>{} );
}

template <class R, class... ARGS>
struct function_ripper {
    static constexpr size_t n_args = sizeof...(ARGS);
};

template <class R, class... ARGS>
auto constexpr n_params(R (ARGS...) )
{
    return function_ripper<R, ARGS...>();
}

auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
             gsl_multifit_nlinear_parameters *params) -> std::vector<double>
{
  // This specifies a trust region method
  const gsl_multifit_nlinear_type *T = gsl_multifit_nlinear_trust;
  const size_t max_iter = 200;
  const double xtol = 1.0e-8;
  const double gtol = 1.0e-8;
  const double ftol = 1.0e-8;

  auto *work = gsl_multifit_nlinear_alloc(T, params, fdf->n, fdf->p);
  int info;

  // initialize solver
  gsl_multifit_nlinear_init(initial_params, fdf, work);
  //iterate until convergence
  gsl_multifit_nlinear_driver(max_iter, xtol, gtol, ftol, nullptr, nullptr, &info, work);

  // result will be stored here
  gsl_vector * y    = gsl_multifit_nlinear_position(work);
  auto result = std::vector<double>(initial_params->size);

  for(int i = 0; i < result.size(); i++)
  {
    result[i] = gsl_vector_get(y, i);
  }

  auto niter = gsl_multifit_nlinear_niter(work);
  auto nfev  = fdf->nevalf;
  auto njev  = fdf->nevaldf;
  auto naev  = fdf->nevalfvv;

  gsl_multifit_nlinear_free(work);
  gsl_vector_free(initial_params);
  return result;
}

template<auto n>
auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*
{
    auto* result = gsl_vector_alloc(vec.size());
    int i = 0;
    for(const auto e: vec)
    {
        gsl_vector_set(result, i, e);
        i++;
    }
    return result;
}

template<typename C1>
struct fit_data
{
    const std::vector<double>& t;
    const std::vector<double>& y;
    // the actual function to be fitted
    C1 f;
};


template<typename FitData, int n_params>
int internal_f(const gsl_vector* x, void* params, gsl_vector *f)
{
    auto* d  = static_cast<FitData*>(params);
    // Convert the parameter values from gsl_vector (in x) into std::tuple
    auto init_args = [x](int index)
    {
        return gsl_vector_get(x, index);
    };
    auto parameters = gen_tuple<n_params>(init_args);

    // Calculate the error for each...
    for (size_t i = 0; i < d->t.size(); ++i)
    {
        double ti = d->t[i];
        double yi = d->y[i];
        auto func = [ti, &d](auto ...xs)
        {
            // call the actual function to be fitted
            return d->f(ti, xs...);
        };
        auto y = std::apply(func, parameters);
        gsl_vector_set(f, i, yi - y);
    }
    return GSL_SUCCESS;
}

using func_f_type   = int (*) (const gsl_vector*, void*, gsl_vector*);
using func_df_type  = int (*) (const gsl_vector*, void*, gsl_matrix*);
using func_fvv_type = int (*) (const gsl_vector*, const gsl_vector *, void *, gsl_vector *);

template<auto n>
auto internal_make_gsl_vector_ptr(const std::array<double, n>& vec) -> gsl_vector*;

auto internal_solve_system(gsl_vector* initial_params, gsl_multifit_nlinear_fdf *fdf,
             gsl_multifit_nlinear_parameters *params) -> std::vector<double>;

template<typename C1>
auto curve_fit_impl(func_f_type f, func_df_type df, func_fvv_type fvv, gsl_vector* initial_params, fit_data<C1>& fd) -> std::vector<double>
{
    assert(fd.t.size() == fd.y.size());

    auto fdf = gsl_multifit_nlinear_fdf();
    auto fdf_params = gsl_multifit_nlinear_default_parameters();

    fdf.f   = f;
    fdf.df  = df;
    fdf.fvv = fvv;
    fdf.n   = fd.t.size();
    fdf.p   = initial_params->size;
    fdf.params = &fd;

    // "This selects the Levenberg-Marquardt algorithm with geodesic acceleration."
    fdf_params.trs = gsl_multifit_nlinear_trs_lmaccel;
    return internal_solve_system(initial_params, &fdf, &fdf_params);
}

template <typename Callable, auto n>
auto curve_fit(Callable f, const std::array<double, n>& initial_params, const std::vector<double>& x, const std::vector<double>& y) -> std::vector<double>
{
    // We can't pass lambdas without convert to std::function.
    //constexpr auto n = 3;//decltype(n_params(f))::n_args - 5;
    //constexpr auto n = 2;
    assert(initial_params.size() == n);

    auto params = internal_make_gsl_vector_ptr(initial_params);
    auto fd = fit_data<Callable>{x, y, f};
    return curve_fit_impl(internal_f<decltype(fd), n>, nullptr, nullptr, params,  fd);
}

In the same *.cpp file is also included the gaussian function, the gaussian functor gaussian_fixed_a and the fittingTest function, as detailed in the question.

In R, I would test the fittingTest function in the following way:

Sys.setenv("PKG_CXXFLAGS"="-std=c++17")

require(Rcpp)
require(RcppGSL)

sourceCpp('./testingCode.cpp')

x_list = seq(2,14,1)

###START: simulate Data under gaussian function###
res = data.frame(geo=x_list,val=NA)

for(i in 1:length(x_list)){
  res$val[i]= gaussian(x_list[i], a=8, b=0.4, c=5)
}
###END: simulate Data under gaussian function###

fittingTest(xs=res$geo,ys=res$val,a=8)

And the result I get for this example, while fixing a=8, are:

$b
[1] 0.4

$c
[1] 5

For a visual inspection of the result, you can see that the simulated and fitted data overlap perfectly (note that in this R example I did not add any 'noise' to the simulated data.

enter image description here

CafféSospeso
  • 1,101
  • 3
  • 11
  • 28