Next: , Previous: Troubleshooting, Up: Least-Squares Fitting   [Index]

### 37.7 Examples

The following program computes a least squares straight-line fit to a simple dataset, and outputs the best-fit line and its associated one standard-deviation error bars.

```#include <stdio.h>
#include <gsl/gsl_fit.h>

int
main (void)
{
int i, n = 4;
double x[4] = { 1970, 1980, 1990, 2000 };
double y[4] = {   12,   11,   14,   13 };
double w[4] = {  0.1,  0.2,  0.3,  0.4 };

double c0, c1, cov00, cov01, cov11, chisq;

gsl_fit_wlinear (x, 1, w, 1, y, 1, n,
&c0, &c1, &cov00, &cov01, &cov11,
&chisq);

printf ("# best fit: Y = %g + %g X\n", c0, c1);
printf ("# covariance matrix:\n");
printf ("# [ %g, %g\n#   %g, %g]\n",
cov00, cov01, cov01, cov11);
printf ("# chisq = %g\n", chisq);

for (i = 0; i < n; i++)
printf ("data: %g %g %g\n",
x[i], y[i], 1/sqrt(w[i]));

printf ("\n");

for (i = -30; i < 130; i++)
{
double xf = x[0] + (i/100.0) * (x[n-1] - x[0]);
double yf, yf_err;

gsl_fit_linear_est (xf,
c0, c1,
cov00, cov01, cov11,
&yf, &yf_err);

printf ("fit: %g %g\n", xf, yf);
printf ("hi : %g %g\n", xf, yf + yf_err);
printf ("lo : %g %g\n", xf, yf - yf_err);
}
return 0;
}
```

The following commands extract the data from the output of the program and display it using the GNU plotutils `graph` utility,

```\$ ./demo > tmp
\$ more tmp
# best fit: Y = -106.6 + 0.06 X
# covariance matrix:
# [ 39602, -19.9
#   -19.9, 0.01]
# chisq = 0.8

\$ for n in data fit hi lo ;
do
grep "^\$n" tmp | cut -d: -f2 > \$n ;
done
\$ graph -T X -X x -Y y -y 0 20 -m 0 -S 2 -Ie data
-S 0 -I a -m 1 fit -m 2 hi -m 2 lo
```

The next program performs a quadratic fit y = c_0 + c_1 x + c_2 x^2 to a weighted dataset using the generalised linear fitting function `gsl_multifit_wlinear`. The model matrix X for a quadratic fit is given by,

```X = [ 1   , x_0  , x_0^2 ;
1   , x_1  , x_1^2 ;
1   , x_2  , x_2^2 ;
... , ...  , ...   ]
```

where the column of ones corresponds to the constant term c_0. The two remaining columns corresponds to the terms c_1 x and c_2 x^2.

The program reads n lines of data in the format (x, y, err) where err is the error (standard deviation) in the value y.

```#include <stdio.h>
#include <gsl/gsl_multifit.h>

int
main (int argc, char **argv)
{
int i, n;
double xi, yi, ei, chisq;
gsl_matrix *X, *cov;
gsl_vector *y, *w, *c;

if (argc != 2)
{
fprintf (stderr,"usage: fit n < data\n");
exit (-1);
}

n = atoi (argv[1]);

X = gsl_matrix_alloc (n, 3);
y = gsl_vector_alloc (n);
w = gsl_vector_alloc (n);

c = gsl_vector_alloc (3);
cov = gsl_matrix_alloc (3, 3);

for (i = 0; i < n; i++)
{
int count = fscanf (stdin, "%lg %lg %lg",
&xi, &yi, &ei);

if (count != 3)
{
fprintf (stderr, "error reading file\n");
exit (-1);
}

printf ("%g %g +/- %g\n", xi, yi, ei);

gsl_matrix_set (X, i, 0, 1.0);
gsl_matrix_set (X, i, 1, xi);
gsl_matrix_set (X, i, 2, xi*xi);

gsl_vector_set (y, i, yi);
gsl_vector_set (w, i, 1.0/(ei*ei));
}

{
gsl_multifit_linear_workspace * work
= gsl_multifit_linear_alloc (n, 3);
gsl_multifit_wlinear (X, w, y, c, cov,
&chisq, work);
gsl_multifit_linear_free (work);
}

#define C(i) (gsl_vector_get(c,(i)))
#define COV(i,j) (gsl_matrix_get(cov,(i),(j)))

{
printf ("# best fit: Y = %g + %g X + %g X^2\n",
C(0), C(1), C(2));

printf ("# covariance matrix:\n");
printf ("[ %+.5e, %+.5e, %+.5e  \n",
COV(0,0), COV(0,1), COV(0,2));
printf ("  %+.5e, %+.5e, %+.5e  \n",
COV(1,0), COV(1,1), COV(1,2));
printf ("  %+.5e, %+.5e, %+.5e ]\n",
COV(2,0), COV(2,1), COV(2,2));
printf ("# chisq = %g\n", chisq);
}

gsl_matrix_free (X);
gsl_vector_free (y);
gsl_vector_free (w);
gsl_vector_free (c);
gsl_matrix_free (cov);

return 0;
}
```

A suitable set of data for fitting can be generated using the following program. It outputs a set of points with gaussian errors from the curve y = e^x in the region 0 < x < 2.

```#include <stdio.h>
#include <math.h>
#include <gsl/gsl_randist.h>

int
main (void)
{
double x;
const gsl_rng_type * T;
gsl_rng * r;

gsl_rng_env_setup ();

T = gsl_rng_default;
r = gsl_rng_alloc (T);

for (x = 0.1; x < 2; x+= 0.1)
{
double y0 = exp (x);
double sigma = 0.1 * y0;
double dy = gsl_ran_gaussian (r, sigma);

printf ("%g %g %g\n", x, y0 + dy, sigma);
}

gsl_rng_free(r);

return 0;
}
```

The data can be prepared by running the resulting executable program,

```\$ GSL_RNG_TYPE=mt19937_1999 ./generate > exp.dat
\$ more exp.dat
0.1 0.97935 0.110517
0.2 1.3359 0.12214
0.3 1.52573 0.134986
0.4 1.60318 0.149182
0.5 1.81731 0.164872
0.6 1.92475 0.182212
....
```

To fit the data use the previous program, with the number of data points given as the first argument. In this case there are 19 data points.

```\$ ./fit 19 < exp.dat
0.1 0.97935 +/- 0.110517
0.2 1.3359 +/- 0.12214
...
# best fit: Y = 1.02318 + 0.956201 X + 0.876796 X^2
# covariance matrix:
[ +1.25612e-02, -3.64387e-02, +1.94389e-02
-3.64387e-02, +1.42339e-01, -8.48761e-02
+1.94389e-02, -8.48761e-02, +5.60243e-02 ]
# chisq = 23.0987
```

The parameters of the quadratic fit match the coefficients of the expansion of e^x, taking into account the errors on the parameters and the O(x^3) difference between the exponential and quadratic functions for the larger values of x. The errors on the parameters are given by the square-root of the corresponding diagonal elements of the covariance matrix. The chi-squared per degree of freedom is 1.4, indicating a reasonable fit to the data.

The next program demonstrates the advantage of robust least squares on a dataset with outliers. The program generates linear (x,y) data pairs on the line y = 1.45 x + 3.88, adds some random noise, and inserts 3 outliers into the dataset. Both the robust and ordinary least squares (OLS) coefficients are computed for comparison.

```#include <stdio.h>
#include <gsl/gsl_multifit.h>
#include <gsl/gsl_randist.h>

int
dofit(const gsl_multifit_robust_type *T,
const gsl_matrix *X, const gsl_vector *y,
gsl_vector *c, gsl_matrix *cov)
{
int s;
gsl_multifit_robust_workspace * work
= gsl_multifit_robust_alloc (T, X->size1, X->size2);

s = gsl_multifit_robust (X, y, c, cov, work);
gsl_multifit_robust_free (work);

return s;
}

int
main (int argc, char **argv)
{
int i;
size_t n;
const size_t p = 2; /* linear fit */
gsl_matrix *X, *cov;
gsl_vector *x, *y, *c, *c_ols;
const double a = 1.45; /* slope */
const double b = 3.88; /* intercept */
gsl_rng *r;

if (argc != 2)
{
fprintf (stderr,"usage: robfit n\n");
exit (-1);
}

n = atoi (argv[1]);

X = gsl_matrix_alloc (n, p);
x = gsl_vector_alloc (n);
y = gsl_vector_alloc (n);

c = gsl_vector_alloc (p);
c_ols = gsl_vector_alloc (p);
cov = gsl_matrix_alloc (p, p);

r = gsl_rng_alloc(gsl_rng_default);

/* generate linear dataset */
for (i = 0; i < n - 3; i++)
{
double dx = 10.0 / (n - 1.0);
double ei = gsl_rng_uniform(r);
double xi = -5.0 + i * dx;
double yi = a * xi + b;

gsl_vector_set (x, i, xi);
gsl_vector_set (y, i, yi + ei);
}

/* add a few outliers */
gsl_vector_set(x, n - 3, 4.7);
gsl_vector_set(y, n - 3, -8.3);

gsl_vector_set(x, n - 2, 3.5);
gsl_vector_set(y, n - 2, -6.7);

gsl_vector_set(x, n - 1, 4.1);
gsl_vector_set(y, n - 1, -6.0);

/* construct design matrix X for linear fit */
for (i = 0; i < n; ++i)
{
double xi = gsl_vector_get(x, i);

gsl_matrix_set (X, i, 0, 1.0);
gsl_matrix_set (X, i, 1, xi);
}

/* perform robust and OLS fit */
dofit(gsl_multifit_robust_ols, X, y, c_ols, cov);
dofit(gsl_multifit_robust_bisquare, X, y, c, cov);

/* output data and model */
for (i = 0; i < n; ++i)
{
double xi = gsl_vector_get(x, i);
double yi = gsl_vector_get(y, i);
gsl_vector_view v = gsl_matrix_row(X, i);
double y_ols, y_rob, y_err;

gsl_multifit_robust_est(&v.vector, c, cov, &y_rob, &y_err);
gsl_multifit_robust_est(&v.vector, c_ols, cov, &y_ols, &y_err);

printf("%g %g %g %g\n", xi, yi, y_rob, y_ols);
}

#define C(i) (gsl_vector_get(c,(i)))
#define COV(i,j) (gsl_matrix_get(cov,(i),(j)))

{
printf ("# best fit: Y = %g + %g X\n",
C(0), C(1));

printf ("# covariance matrix:\n");
printf ("# [ %+.5e, %+.5e\n",
COV(0,0), COV(0,1));
printf ("#   %+.5e, %+.5e\n",
COV(1,0), COV(1,1));
}

gsl_matrix_free (X);
gsl_vector_free (x);
gsl_vector_free (y);
gsl_vector_free (c);
gsl_vector_free (c_ols);
gsl_matrix_free (cov);
gsl_rng_free(r);

return 0;
}
```

The output from the program is shown in the following plot.

Next: , Previous: Troubleshooting, Up: Least-Squares Fitting   [Index]