vector of struct in linear regression

Question

I'm new to C++ programming and I'm trying to code a simple linear regression program that returns the parameters of the affine function y=ax+b.

My concern is that I can't "use" the vector containing the x and y coordinates of my p points representing my point cloud in my "fit" and "mse" functions. Thank you in advance for your help.

Greetings

Frederick

#include <iostream>
#include <iomanip>
#include <cmath>
#include <vector>

using namespace std;

struct Point {
    double x;
    double y;
};

struct line {
    double a;
    double b;
};

const vector<Point> points({ { 1.2, 2.3 },
                             { 2.4, 2.9 },
                             { 1.9, 2.5 },
                             { 2.5, 3.8 },
                                          });

void affiche(const Point& p);
void fit(const Point& p, line& d);
void line_value(const line& d, double x);
void affiche_line(const line& d);
double mse(const line& d, const Point& p);

int main() {

    line l;
    Point p;
    
    fit(p, l);
    cout << endl;
    double x(2.0);
    line_value(l, x);
    cout << endl;
    affiche_line(l);
    cout << endl;
    cout << "MSE = " << mse(l, p) << endl;

    return 0;
}

void affiche(const Point& p) {
    for(auto p : points) {
        cout << "x= " << p.x << " , y= " << p.y << endl;
    }
}

void fit(const Point& p, line& d) {
    double sum_x(0.0);
    double sum_y(0.0);
    
    cout << setw(10) << "x_i"
         << setw(10) << "y_i"
         << setw(10) << "sum x"
         << setw(10) << "sum y"
         << setw(10) << "x^2"
         << setw(10) << "xy"
         << endl;

    for(size_t i(0); i < points.size(); ++i) {
        sum_x += p.x;
        sum_y += p.y;
        
        d.a += (p.x * p.y - 1.0 / points.size() * sum_x * sum_y) /
               (p.x * p.x - 1.0 / points.size() * sum_x * sum_x);
        
        d.b += 1.0 / points.size() * (p.y - d.a * p.x);
        
        cout << setw(10) << p.x << setw(10) << p.y
             << setw(10) << sum_x << setw(10) << sum_y
             << setw(10) << p.x * p.x << setw(10) << p.x * p.y
             << endl;
    }
}

void line_value(const line& d, double x) {
    cout << "x= " << x << endl;
    cout << "y= " << d.a * x + d.b << endl;
}

void affiche_line(const line& d) {
    cout << "Parameter a= " << d.a << endl;
    cout << "Parameter b= " << d.b << endl;
}

double mse(const line& d, const Point& p) {
    double MSE(0.0);
    
    for(size_t i(0); i < points.size(); ++i){
        MSE += 1.0 / points.size() * pow(p.y - (d.a * p.x + d.b), 2.0);
    }
    return MSE;
}

I tried to initialize my vector of points in the main but it didn't work at all.

As I said im a beginner and i don't have any solution.

It is impossible to use the numbers in my vector to generate the values of a and b

What "doesn't work"? Your code compiles fine for me. – selbie Aug 19 '23 at 19:50 — selbie, Aug 19 '23 at 19:50

score -1 · Answer 1 · answered Aug 19 '23 at 19:51

Certainly, Frederick! The issue here is that you are not actually utilizing the points vector inside your fit and mse functions. You are only using the parameter p, which is a single Point structure, and not the vector of points.

Here's a concise fix for your code:

Modify the function signatures for fit and mse to accept the points vector.
Update the loop inside fit and mse to iterate over the points vector.

void fit(const vector<Point>& points, line& d); // Function signature change
double mse(const line& d, const vector<Point>& points); // Function signature change

int main() {
    //...
    fit(points, l); // Pass the points vector
    //...
    cout << "MSE = " << mse(l, points) << endl; // Pass the points vector
    //...
}

void fit(const vector<Point>& points, line& d) { // Function definition change
    //...
    for(size_t i(0); i < points.size(); ++i) {
        double x = points[i].x;
        double y = points[i].y;
        sum_x += x;
        sum_y += y;
        // Modify d.a and d.b calculations accordingly
        //...
    }
    //...
}

double mse(const line& d, const vector<Point>& points) { // Function definition change
    //...
    for(size_t i(0); i < points.size(); ++i){
        double x = points[i].x;
        double y = points[i].y;
        MSE += 1.0 / points.size() * pow(y - (d.a * x + d.b), 2.0);
    }
    return MSE;
}

These changes will make the code use the actual points in the vector to fit the line and calculate the mean squared error. Make sure to modify the formulas for a and b according to the correct linear regression calculations.

vector of struct in linear regression

1 Answers1