Visualization of Gradient Descent Optimization With Momentum

Finally, we can visualize the progress of the gradient descent optimization algorithm with momentum.

The complete example is listed below.

example of plotting gradient descent with momentum for a one-dimensional function

from numpy import asarray
from numpy import arange
from numpy.random import rand
from numpy.random import seed
from matplotlib import pyplot

objective function

def objective(x):
return x**2.0

derivative of objective function

def derivative(x):
return x * 2.0

gradient descent algorithm

def gradient_descent(objective, derivative, bounds, n_iter, step_size, momentum):
# track all solutions
solutions, scores = list(), list()
# generate an initial point
solution = bounds[:, 0] + rand(len(bounds)) * (bounds[:, 1] - bounds[:, 0])
# keep track of the change
change = 0.0
# run the gradient descent
for i in range(n_iter):
# calculate gradient
gradient = derivative(solution)
# calculate update
new_change = step_size * gradient + momentum * change
# take a step
solution = solution - new_change
# save the change
change = new_change
# evaluate candidate point
solution_eval = objective(solution)
# store solution
solutions.append(solution)
scores.append(solution_eval)
# report progress
print(’>%d f(%s) = %.5f’ % (i, solution, solution_eval))
return [solutions, scores]

seed the pseudo random number generator

seed(4)

define range for input

bounds = asarray([[-1.0, 1.0]])

define the total iterations

n_iter = 30

define the step size

step_size = 0.1

define momentum

momentum = 0.3

perform the gradient descent search with momentum

solutions, scores = gradient_descent(objective, derivative, bounds, n_iter, step_size, momentum)

sample input range uniformly at 0.1 increments

inputs = arange(bounds[0,0], bounds[0,1]+0.1, 0.1)

compute targets

results = objective(inputs)

create a line plot of input vs result

pyplot.plot(inputs, results)

plot the solutions found

pyplot.plot(solutions, scores, ‘.-’, color=‘red’)

show the plot

pyplot.show()

example of plotting gradient descent with momentum for a one-dimensional function

from numpy import asarray
from numpy import arange
from numpy.random import rand
from numpy.random import seed
from matplotlib import pyplot

objective function

def objective(x):
return x**2.0

derivative of objective function

def derivative(x):
return x * 2.0

gradient descent algorithm

def gradient_descent(objective, derivative, bounds, n_iter, step_size, momentum):
# track all solutions
solutions, scores = list(), list()
# generate an initial point
solution = bounds[:, 0] + rand(len(bounds)) * (bounds[:, 1] - bounds[:, 0])
# keep track of the change
change = 0.0
# run the gradient descent
for i in range(n_iter):
# calculate gradient
gradient = derivative(solution)
# calculate update
new_change = step_size * gradient + momentum * change
# take a step
solution = solution - new_change
# save the change
change = new_change
# evaluate candidate point
solution_eval = objective(solution)
# store solution
solutions.append(solution)
scores.append(solution_eval)
# report progress
print(’>%d f(%s) = %.5f’ % (i, solution, solution_eval))
return [solutions, scores]

seed the pseudo random number generator

seed(4)

define range for input

bounds = asarray([[-1.0, 1.0]])

define the total iterations

n_iter = 30

define the step size

step_size = 0.1

define momentum

momentum = 0.3

perform the gradient descent search with momentum

solutions, scores = gradient_descent(objective, derivative, bounds, n_iter, step_size, momentum)

sample input range uniformly at 0.1 increments

inputs = arange(bounds[0,0], bounds[0,1]+0.1, 0.1)

compute targets

results = objective(inputs)

create a line plot of input vs result

pyplot.plot(inputs, results)

plot the solutions found

pyplot.plot(solutions, scores, ‘.-’, color=‘red’)

show the plot

pyplot.show()
Running the example performs the gradient descent search with momentum on the objective function as before, except in this case, each point found during the search is plotted.

Note: Your results may vary given the stochastic nature of the algorithm or evaluation procedure, or differences in numerical precision. Consider running the example a few times and compare the average outcome.