diff --git a/regression/code/errorGradient.m b/regression/code/errorGradient.m index 2f3ec16..c31e684 100644 --- a/regression/code/errorGradient.m +++ b/regression/code/errorGradient.m @@ -1,13 +1,12 @@ -% x, y, slopes, and intercepts from exercise 8.3 +% x, y, slopes, intercepts, ii, ss, and error_surface from exercise 8.3 -error_surface = zeros(length(slopes), length(intercepts)); -gradient_m = zeros(size(error_surface)); -gradient_b = zeros(size(error_surface)); - -for i = 1:length(slopes) - for j = 1:length(intercepts) - error_surface(i,j) = meanSquaredError(x, y, [slopes(i), intercepts(j)]); - grad = meanSquaredGradient(x, y, [slopes(i), intercepts(j)]); +qslopes = 1.0:0.5:4.0; +qintercepts = -150:50:150; +gradient_m = zeros(length(qintercepts), length(qslopes)); +gradient_b = zeros(length(qintercepts), length(qslopes)); +for i = 1:length(qintercepts) + for j = 1:length(qslopes) + grad = meanSquaredGradient(x, y, [qslopes(j), qintercepts(i)]); gradient_m(i,j) = grad(1); gradient_b(i,j) = grad(2); end @@ -15,13 +14,12 @@ end figure() hold on -[N, M] = meshgrid(intercepts, slopes); -%surface(M, N, error_surface, 'FaceAlpha', 0.5); -contour(M, N, error_surface, 50); -quiver(M, N, gradient_m, gradient_b) +contour(ss, ii, error_surface, 70); +[qss, qii] = meshgrid(qslopes, qintercepts); +quiver(qss, qii, gradient_m, gradient_b, 0.01) xlabel('Slope m') ylabel('Intercept b') zlabel('Mean squared error') -set(gcf, 'paperunits', 'centimeters', 'papersize', [15, 10.5], ... - 'paperposition', [0., 0., 15, 10.5]) -saveas(gcf, 'error_gradient', 'pdf') +%set(gcf, 'paperunits', 'centimeters', 'papersize', [15, 10.5], ... +% 'paperposition', [0., 0., 15, 10.5]) +%saveas(gcf, 'error_gradient', 'pdf') diff --git a/regression/code/errorSurface.m b/regression/code/errorSurface.m index 33380ae..99b2087 100644 --- a/regression/code/errorSurface.m +++ b/regression/code/errorSurface.m @@ -1,27 +1,27 @@ % generate data: -m = 0.75; +m = 3.0; b = -40.0; n = 20; x = 120.0*rand(n, 1); y = m*x + b + 15.0*randn(n, 1); % compute mean squared error for a range of slopes and intercepts: -slopes = -5:0.25:5; -intercepts = -30:1:30; -error_surface = zeros(length(slopes), length(intercepts)); -for i = 1:length(slopes) - for j = 1:length(intercepts) - error_surf(i,j) = meanSquaredError(x, y, [slopes(i), intercepts(j)]); +slopes = 0.0:0.1:5.0; +intercepts = -200:10:200; +error_surface = zeros(length(intercepts), length(slopes)); +for i = 1:length(intercepts) + for j = 1:length(slopes) + error_surface(i,j) = meanSquaredError(x, y, [slopes(j), intercepts(i)]); end end % plot the error surface: figure() -[N,M] = meshgrid(intercepts, slopes); -surface(M, N, error_surface); +[ss, ii] = meshgrid(slopes, intercepts); +surface(ss, ii, error_surface); xlabel('slope', 'rotation', 7.5) ylabel('intercept', 'rotation', -22.5) -zlabel('error') +zlabel('mean squared error') set(gca,'xtick', (-5:2.5:5)) grid on view(3) diff --git a/regression/code/gradientDescent.m b/regression/code/gradientDescent.m index 1158b0b..1919035 100644 --- a/regression/code/gradientDescent.m +++ b/regression/code/gradientDescent.m @@ -1,13 +1,13 @@ % x, y from exercise 8.3 % some arbitrary values for the slope and the intercept to start with: -position = [-2. 10.]; +position = [-2.0, 10.0]; % gradient descent: gradient = []; errors = []; count = 1; -eps = 0.01; +eps = 0.0001; while isempty(gradient) || norm(gradient) > 0.1 gradient = meanSquaredGradient(x, y, position); errors(count) = meanSquaredError(x, y, position); diff --git a/regression/code/meanSquaredGradient.m b/regression/code/meanSquaredGradient.m index b6bd0ad..b6ae548 100644 --- a/regression/code/meanSquaredGradient.m +++ b/regression/code/meanSquaredGradient.m @@ -8,9 +8,9 @@ function gradient = meanSquaredGradient(x, y, parameter) % % Returns: the gradient as a vector with two elements - h = 1e-6; % stepsize for derivatives + h = 1e-5; % stepsize for derivatives mse = meanSquaredError(x, y, parameter); partial_m = (meanSquaredError(x, y, [parameter(1)+h, parameter(2)]) - mse)/h; - partial_n = (meanSquaredError(x, y, [parameter(1), parameter(2)+h]) - mse)/h; - gradient = [partial_m, partial_n]; + partial_b = (meanSquaredError(x, y, [parameter(1), parameter(2)+h]) - mse)/h; + gradient = [partial_m, partial_b]; end diff --git a/regression/lecture/regression-chapter.tex b/regression/lecture/regression-chapter.tex index 4ee5414..e24239d 100644 --- a/regression/lecture/regression-chapter.tex +++ b/regression/lecture/regression-chapter.tex @@ -16,6 +16,14 @@ \include{regression} +\subsection{Start with one-dimensional problem!} +\begin{itemize} +\item Just the root mean square as a function of the slope +\item 1-d gradient +\item 1-d gradient descend +\item Homework is to do the 2d problem! +\end{itemize} + \subsection{Linear fits} \begin{itemize} \item Polyfit is easy: unique solution!