function [p, ps, mses] = gradientdescent(t, x, func, p0, epsilon, threshold)
% Gradient descent for leas squares fit.
%
% Arguments: t, vector of time points.
%            x, vector of the corresponding measured data values.
%            p0, vector with initial parameter values.
%            func, function to be fit to the data. 
%            epsilon: factor multiplying the gradient.
%            threshold: minimum value for gradient
%
% Returns:   p, vector with the final estimates of the parameter values.
%            ps: 2D vector with all the parameter values traversed.
%            mses: vector with the corresponding mean squared errors
  p = p0;
  gradient = ones(size(p0)) * 1000.0;
  ps = [];
  mses = [];
  while norm(gradient) > threshold
      ps = [ps, p(:)];
      mses = [mses, meansquarederror(t, x, func, p)];
      gradient = msegradient(t, x, func, p);
      p = p - epsilon * gradient; 
  end
end

function mse = meansquarederror(t, x, func, p)
  mse = mean((x - func(t, p)).^2);
end

function gradmse = msegradient(t, x, func, p)
  gradmse = zeros(size(p));
  h = 1e-7;   % stepsize for derivative
  ph = eye(length(p))*h;  % ... for each parameter
  mse = meansquarederror(t, x, func, p);
  for i = 1:length(p)     % for each coordinate ...
    msepi = meansquarederror(t, x, func, p(:) + ph(:,i));
    gradmse(i) = (msepi - mse)/h;
  end
end