3

(Disclaimer: I am terrible with math and am coming from JavaScript, so I apologize for any inaccuracies and will do my best to correct them.)

The example on Rosetta Code shows how to calculate coefficients using gsl. Here is the code:

polifitgsl.h:

#ifndef _POLIFITGSL_H
#define _POLIFITGSL_H
#include <gsl/gsl_multifit.h>
#include <stdbool.h>
#include <math.h>
bool polynomialfit(int obs, int degree, 
           double *dx, double *dy, double *store); /* n, p */
#endif

polifitgsl.cpp:

#include "polifitgsl.h"

bool polynomialfit(int obs, int degree, 
           double *dx, double *dy, double *store) /* n, p */
{
  gsl_multifit_linear_workspace *ws;
  gsl_matrix *cov, *X;
  gsl_vector *y, *c;
  double chisq;

  int i, j;

  X = gsl_matrix_alloc(obs, degree);
  y = gsl_vector_alloc(obs);
  c = gsl_vector_alloc(degree);
  cov = gsl_matrix_alloc(degree, degree);

  for(i=0; i < obs; i++) {
    for(j=0; j < degree; j++) {
      gsl_matrix_set(X, i, j, pow(dx[i], j));
    }
    gsl_vector_set(y, i, dy[i]);
  }

  ws = gsl_multifit_linear_alloc(obs, degree);
  gsl_multifit_linear(X, y, c, cov, &chisq, ws);

  /* store result ... */
  for(i=0; i < degree; i++)
  {
    store[i] = gsl_vector_get(c, i);
  }

  gsl_multifit_linear_free(ws);
  gsl_matrix_free(X);
  gsl_matrix_free(cov);
  gsl_vector_free(y);
  gsl_vector_free(c);
  return true; /* we do not "analyse" the result (cov matrix mainly)
          to know if the fit is "good" */
}

main.cpp (note I've replaced the sample numbers for x any y with my own):

#include <stdio.h>

#include "polifitgsl.h"

#define NP 11
double x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = {98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9};

#define DEGREE 3
double coeff[DEGREE];

int main()
{
  int i;

  polynomialfit(NP, DEGREE, x, y, coeff);
  for(i=0; i < DEGREE; i++) {
    printf("%lf\n", coeff[i]);
  }
  return 0;
}

And here is the output:

98.030909
-0.016182
0.000909

So that gives me the coefficients. But what I really want is the actual fitted points. In JavaScript, I've used the regression package to calculate the points:

var regression = require('regression');

var calculateRegression = function(values, degree) {
    var data = [];
    var regressionOutput;
    var valuesCount = values.length;
    var i = 0;

    // Format the data in a way the regression library expects.
    for (i = 0; i < valuesCount; i++) {
        data[i] = [i, values[i]];
    }

    // Use the library to calculate the regression.
    regressionOutput = regression('polynomial', data, degree);

    return regressionOutput;
};

var y = [98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9];

console.log(calculateRegression(y, 3));

Which produces:

{ equation: 
   [ 98.02987916431594,
     -0.017378390369880512,
     0.0015748071645344357,
     -0.00005721503635571101 ],
  points: 
   [ [ 0, 98.02987916431594 ],
     [ 1, 98.01401836607424 ],
     [ 2, 98.00096389194348 ],
     [ 3, 97.9903724517055 ],
     [ 4, 97.98190075514219 ],
     [ 5, 97.97520551203543 ],
     [ 6, 97.96994343216707 ],
     [ 7, 97.96577122531896 ],
     [ 8, 97.96234560127297 ],
     [ 9, 97.959323269811 ],
     [ 10, 97.95636094071487 ],
     [ 11, 97.95311532376647 ],
     [ 12, 97.94924312874768 ],
     [ 13, 97.94440106544033 ],
     [ 14, 97.93824584362629 ],
     [ 15, 97.93043417308745 ],
     [ 16, 97.92062276360569 ],
     [ 17, 97.90846832496283 ],
     [ 18, 97.89362756694074 ],
     [ 19, 97.87575719932133 ] ],
  string: 'y = 0x^3 + 0x^2 + -0.02x + 98.03' }

(Note there are floating point issues in JavaScript, so the numbers aren't perfectly exact.)

points here is what I'm wanting to generate using gsl. Is there a way I can do this?

Chad Johnson
  • 21,215
  • 34
  • 109
  • 207
  • 1
    There is no real need for `gsl` to generate points. if your equation above is `y = x^3 + x^2 - 0.2x + 98.03`, then for any `x`, you compute a corresponding value `y` that make up your `(x, y)` points.You need nothing but a simple `double y; for (x = 0; x < 20; x++) y = x*x*x + x*x -.02*x + 98.03;` to generate the points you list above. No `gsl` implications at all. – David C. Rankin Apr 09 '16 at 22:51
  • Of course -- makes perfect sense. How did I not realize this :) Thank you! – Chad Johnson Apr 09 '16 at 23:26
  • Hm, actually, I'm still a bit lost. When I run the gsl example, this is all I get as output: 98.030909, -0.016182, and 0.000909. How exactly do I build an equation from this? – Chad Johnson Apr 09 '16 at 23:42
  • Chad, if you are talking about the points in the in the vector and covariance matrix used in the curve fit, you can use `gsl_vector_get` and `gsl_matrix_get` (see: [**gsl multifit example**](https://www.gnu.org/software/gsl/manual/html_node/Fitting-Examples.html) ) Otherwise, if you are just talking about the points at any point in your range of `x` values, then just use the polynomial equation with the coefficients found. What you show in the example above seems to be the points used in the curve fit. – David C. Rankin Apr 10 '16 at 01:56

2 Answers2

2

Chad, if I understand what you are needing, you simply need to compute the values based on the polynomial equation using the coefficients you found with your polynomialfit function. After you compute the coefficients, you can find the value y for any x (for DEGREE = 3) with the equation:

y = x^2*(coeff2) + x*(coeff1) + coeff0

or in C-syntax

y = x*x*coeff[2] + x*coeff[1] + coeff[0];

You can modify your main.cpp as follows (you should rename both main.cpp to main.c and polyfitgsl.cpp to polyfitgsl.c -- as they are both C-source files, not C++)

#include <stdio.h>

#include "polifitgsl.h"

#define NP 20
double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
               10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
               97.97, 97.96, 97.94, 97.96, 97.96,
               97.97, 97.97, 97.94, 97.94, 97.94,
               97.92, 97.96, 97.9,  97.85, 97.9 };

#define DEGREE 3
double coeff[DEGREE];

int main (void)
{
    int i;

    polynomialfit (NP, DEGREE, x, y, coeff);

    printf ("\n polynomial coefficients:\n\n");
    for (i = 0; i < DEGREE; i++) {
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    }
    putchar ('\n');

    printf (" computed values:\n\n   x    y\n");
    for (i = 0; i < (int)(sizeof x/sizeof *x); i++)
        printf ("  %2d, %11.7lf\n", i, 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    putchar ('\n');

    return 0;
}

Which provides the following output:

Example Use/Output

$ ./bin/main

 polynomial coefficients:

  coeff[0] :  98.0132468
  coeff[1] :  -0.0053003
  coeff[2] :  -0.0000558

 computed values:

   x    y
   0,  98.0132468
   1,  98.0078906
   2,  98.0024229
   3,  97.9968435
   4,  97.9911524
   5,  97.9853497
   6,  97.9794354
   7,  97.9734094
   8,  97.9672718
   9,  97.9610226
  10,  97.9546617
  11,  97.9481891
  12,  97.9416049
  13,  97.9349091
  14,  97.9281016
  15,  97.9211825
  16,  97.9141517
  17,  97.9070093
  18,  97.8997553
  19,  97.8923896

That seems like what you are asking for. Look it over, compare your values and let me know if you need additional help.


Cleaning Up A Bit Further

Just to clean the code up a bit further, since there is no need for global declaration of x, y, or coeff, and using an enum to define the constants DEGREE and NP, a tidier version would be:

#include <stdio.h>

#include "polifitgsl.h"

enum { DEGREE = 3, NP = 20 };

int main (void)
{
    int i;
    double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
                   10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
    double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
                   97.97, 97.96, 97.94, 97.96, 97.96,
                   97.97, 97.97, 97.94, 97.94, 97.94,
                   97.92, 97.96, 97.9,  97.85, 97.9 };
    double coeff[DEGREE] = {0};


    polynomialfit (NP, DEGREE, x, y, coeff);

    printf ("\n polynomial coefficients:\n\n");
    for (i = 0; i < DEGREE; i++)
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    putchar ('\n');

    printf (" computed values:\n\n   x    y\n");
    for (i = 0; i < (int)(sizeof x/sizeof *x); i++)
        printf ("  %2d, %11.7lf\n", i, 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    putchar ('\n');

    return 0;
}
Chad Johnson
  • 21,215
  • 34
  • 109
  • 207
David C. Rankin
  • 81,885
  • 6
  • 58
  • 85
  • Thank you so much David for going out of your way -- it's really appreciated. The JS regression library numbers were way off! Wow. – Chad Johnson Apr 10 '16 at 04:36
  • I just realized that actually the JavaScript library numbers were off, but they were not that far off. I needed to adjust `NP` in the C program to be 20 instead of 11. I'm going to go ahead and edit your answer to reflect that -- hope you don't mind my fixing my own mistake :) – Chad Johnson Apr 10 '16 at 05:26
  • Glad I could help. If you enjoy programming, you will soon find C will become a favorite. Yes it takes a little extra effort to learn, since it requires an exact understanding of the of just what memory you are using and how you are accessing it. But with that low-level access comes unparallelled coding flexibility and power to have your code do just exactly what you need it to do. There is no better compiled language to learn. If you master C, mastering the rest is a piece of cake. – David C. Rankin Apr 10 '16 at 14:55
  • C certainly has its uses. However, everything being a global function makes it difficult for me to understand the structure of the app. I'm much more into object-oriented languages like C++, Java, and even JavaScript. I worked on a PHP code base several years ago, and all functions were global, so it was very messy and difficult to understand. – Chad Johnson Apr 10 '16 at 17:36
  • 1
    After plowing though how object oriented Desktops (e.g. Gnome, etc.) are written in C, you come to realize the global `main` is really just an entry point for control of the application. C++ uses the same `main`. It is what you turn control over to from `main` where the differences disappear. I see it more as choosing the appropriate tool for the job. C isn't intended to be a, .js, php, etc. It can be used in an object-oriented or procedural manner. That is its flexibility. Where that is most apparent is in the overhead and load-times. You see 400%+ benefit over java and C++ routinely. – David C. Rankin Apr 10 '16 at 18:52
2

I had the same problem and have taken the answers above and added a solution for direct calculation (without gsl) taken from http://www.cplusplus.com/forum/general/181580/

Below, you find a standalone test program with a gsl-based and a direct calculation solution.

I have done some profiling runs and the performance of direct calculation is an impressive 65 times higher on my system, 5.22s vs. 0.08s for 1000 calls to the respective functions.

As a side note, the direct calculation uses Cramer's rule, so you should be careful with ill conditioned data. Normally I would avoid Cramer's but using a full linear system solver for a 3x3 system seems overkill to me.

/*
* =====================================================================================
*
*       Filename:  polyfit.cpp
*
*    Description:  Least squares fit of second order polynomials 
*                  Test program using gsl and direct calculation
*        Version:  1.0
*        Created:  2017-07-17 09:32:55
*       Compiler:  gcc
*
*         Author:  Bernhard Brunner, brb_blog@epr.ch
*     References:  This code was merged, adapted and optimized from these two sources:
*                  http://www.cplusplus.com/forum/general/181580/
*                  https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points
*                  http://brb.epr.ch/blog/blog:least_squares_regression_of_parabola
*          Build:  compile and link using 
*                  g++    -c -o polifitgsl.o polifitgsl.cpp
*                  gcc  polifitgsl.o -lgsl -lm -lblas -o polifitgsl
* 
*      Profiling:
*                  valgrind --tool=callgrind ./polifitgsl
*                  kcachegrind
*                  
*                  polynomialfit        takes 5.22s for 1000 calls
*                  findQuadCoefficients takes 0.08s for 1000 calls
*                   65x faster
* =====================================================================================
*/

#include <stdio.h>
#include <gsl/gsl_multifit.h>
#include <stdbool.h>
#include <math.h>

bool polynomialfit(int obs, int degree, 
        double *dx, double *dy, double *store); /* n, p */
double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
            10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
            97.97, 97.96, 97.94, 97.96, 97.96,
            97.97, 97.97, 97.94, 97.94, 97.94,
            97.92, 97.96, 97.9,  97.85, 97.9 };

#define NP (sizeof(x)/sizeof(double)) // 20

#define DEGREE 3
double coeff[DEGREE];

bool findQuadCoefficients(double timeArray[], double valueArray[], double *coef, double &critPoint, int PointsNum){
    const double S00=PointsNum;//points number
    double S40=0, S10=0, S20=0, S30=0, S01=0, S11=0, S21 = 0;
//    const double MINvalue = valueArray[0];
//    const double MINtime = timeArray[0];
    for (int i=0; i<PointsNum; i++ ){
        double value = valueArray[i]; //  - MINvalue); //normalizing
//      cout << "i=" << i << " index=" << index << " value=" << value << endl;

        int index = timeArray[i]; //  - MINtime;
        int index2 = index * index;
        int index3 = index2 * index;
        int index4 = index3 * index;

        S40+= index4;
        S30+= index3; 
        S20+= index2;
        S10+= index;

        S01 += value;
        S11 += value*index;
        S21 += value*index2;
    }

    double S20squared = S20*S20;

    //minors M_ij=M_ji
    double M11 = S20*S00 - S10*S10;
    double M21 = S30*S00 - S20*S10;
    double M22 = S40*S00 - S20squared;
    double M31 = S30*S10 - S20squared;

    double M32 = S40*S10 - S20*S30;
//  double M33 = S40*S20 - pow(S30,2);

    double discriminant = S40*M11 - S30*M21 + S20*M31;
//    printf("discriminant :%lf\n", discriminant);
    if (abs(discriminant) < .00000000001) return  false;

    double Da = S21*M11
               -S11*M21
               +S01*M31;
    coef[2] = Da/discriminant;
//  cout << "discriminant=" << discriminant;
//  cout << " Da=" << Da;

    double Db = -S21*M21
                +S11*M22
                -S01*M32;
    coef[1] = Db/discriminant;
//  cout << " Db=" << Db << endl;

    double Dc =   S40*(S20*S01 - S10*S11) 
                - S30*(S30*S01 - S10*S21) 
                + S20*(S30*S11 - S20*S21);
    coef[0] = Dc/discriminant;
//    printf("c=%lf\n", c);

    critPoint = -Db/(2*Da); // + MINtime; //-b/(2*a)= -Db/discriminant / (2*(Da/discriminant)) = -Db/(2*Da);

    return true;
}

bool polynomialfit(int obs, int degree, 
        double *dx, double *dy, double *store) /* n, p */
{
gsl_multifit_linear_workspace *ws;
gsl_matrix *cov, *X;
gsl_vector *y, *c;
double chisq;

int i, j;

X = gsl_matrix_alloc(obs, degree);
y = gsl_vector_alloc(obs);
c = gsl_vector_alloc(degree);
cov = gsl_matrix_alloc(degree, degree);

for(i=0; i < obs; i++) {
    for(j=0; j < degree; j++) {
        gsl_matrix_set(X, i, j, pow(dx[i], j));
    }
    gsl_vector_set(y, i, dy[i]);
}

ws = gsl_multifit_linear_alloc(obs, degree);
gsl_multifit_linear(X, y, c, cov, &chisq, ws);

/* store result ... */
for(i=0; i < degree; i++) {
    store[i] = gsl_vector_get(c, i);
}

gsl_multifit_linear_free(ws);
gsl_matrix_free(X);
gsl_matrix_free(cov);
gsl_vector_free(y);
gsl_vector_free(c);
return true; /* we do not "analyse" the result (cov matrix mainly)
        to know if the fit is "good" */
}

void testcoeff(double *coeff)
{
    printf ("\n polynomial coefficients\n");
    for (int i = 0; i < DEGREE; i++) {
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    }
    putchar ('\n');

    printf (" computed values:\n\n   x, yi, yip\n");
    for (unsigned i = 0; i < NP; i++) {
        printf ("%2u,%.7lf,%.7lf\n", i, 
                y[i], 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    }
    putchar ('\n');
}

int main (void)
{
    #define ITER 1000
    for (int i=0; i< ITER; i++) {
        polynomialfit (NP, DEGREE, x, y, coeff);
    }
    testcoeff(coeff);

    double sx; 
    for (int i=0; i< ITER; i++) {
        findQuadCoefficients(x, y, coeff, sx, NP);
    }
    printf("critical point %lf\n", sx);
    testcoeff(coeff);
    return 0;
}

References:

Floyd
  • 739
  • 1
  • 7
  • 20