161 lines
4.1 KiB
Python
Executable File
161 lines
4.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
|
|
"""
|
|
@authors: Yann & Sam'
|
|
"""
|
|
|
|
|
|
from math import sqrt
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import numpy as np
|
|
|
|
|
|
"""
|
|
Example of gradient descent of the Rosenbrock function (based on Cauchy method)
|
|
|--> Without Theano !
|
|
|--> That's one of the answer to: "How to find out the best learning rate ?"
|
|
"""
|
|
|
|
|
|
###############################################################################
|
|
|
|
|
|
# This function returns the optimum "path" of the descent, determined by
|
|
# computing
|
|
def graDescent(x_start, y_start, T, p):
|
|
|
|
# Just returns the value of the Rosenbrock function at a given point
|
|
def rosenbrockFunction(x_y, p):
|
|
|
|
# Let's unpack the coordinates tuple
|
|
x, y = x_y
|
|
|
|
return (x - 1)**2 + p * (x**2 - y)**2
|
|
|
|
# Returns the learning rate which introduced the best minimum reached
|
|
# during the descent
|
|
def bestLearningRate(T, X_Y, p):
|
|
|
|
L = []
|
|
|
|
# Now let's fill in the list with each values of Rosenbrock's function
|
|
for i in range(0, len(X_Y)):
|
|
|
|
L.append(rosenbrockFunction(X_Y[i], p))
|
|
|
|
# We just have to return the learning rate present at the index of the
|
|
# minimum of 'L' !
|
|
return T[L.index(min(L))]
|
|
|
|
# Just computes the gradient of the Rosenbrock function at the point we
|
|
# pass as parameter
|
|
def gradient(x, y, p):
|
|
|
|
return (2 * (x - 1) + 40 * x * (x**2 - y), -2 * p * (x**2 - y))
|
|
|
|
# Will contain each point we'll effectively pass through during the descent
|
|
P = []
|
|
|
|
# Initial conditions (startup point coordinates of gradient descent)
|
|
x, y = (x_start, y_start)
|
|
|
|
# We add the startup point as beginning of the descent
|
|
P.append((x, y))
|
|
|
|
# In this loop, we keep computing the gradient descent while its norm is
|
|
# superior to '0.01' .
|
|
# It allows the program to stop itself when the descent reaches a minimum.
|
|
while True:
|
|
|
|
# Will contain the coordinates of each point we'll pass through during
|
|
# the learning rates test
|
|
X_Y = []
|
|
|
|
# Here we get the last point where we stopped the descent
|
|
x_p, y_p = P[-1]
|
|
|
|
# We compute the gradient at this point
|
|
g_x, g_y = gradient(x_p, y_p, p)
|
|
|
|
# Two new variables to compute the descent without altering the
|
|
# other ones
|
|
x, y = x_p, y_p
|
|
|
|
# In this loop, we'll try many descents with different learning rates,
|
|
# and we'll keep the best one at the end
|
|
for t in T:
|
|
|
|
# Gradient descent !
|
|
x = x_p - t * g_x
|
|
y = y_p - t * g_y
|
|
|
|
# We add the new coordinates to the list with all of them
|
|
X_Y.append((x, y))
|
|
|
|
# Let's get the learning rate which gave us the best minimum reached
|
|
# during the descent
|
|
min_t = bestLearningRate(T, X_Y, p)
|
|
|
|
# Let's add the best minimum point reached to the "path" of the descent
|
|
P.append((x_p - min_t * g_x, y_p - min_t * g_y))
|
|
|
|
if sqrt(g_x**2 + g_y**2) < 0.01:
|
|
|
|
break
|
|
|
|
return P
|
|
|
|
|
|
def promptPath(path):
|
|
|
|
# The list of tuples becomes here a list of two lists as :
|
|
# [[x0, ..., xn], [y0, ..., yn]]
|
|
L = list(map(list, zip(*path)))
|
|
|
|
# New figure for the plots !
|
|
plt.figure("Gradient descent of Rosenbrock's function")
|
|
|
|
# Adding the path to the figure
|
|
plt.plot(L[0], L[1], 'b')
|
|
|
|
plt.show(block=True)
|
|
|
|
|
|
def main():
|
|
|
|
# ############################## Parameters ##############################
|
|
|
|
# Rosenbrock's parameter
|
|
p = 10
|
|
|
|
# Startup descent point
|
|
x_start = 1.5
|
|
y_start = 0.5
|
|
|
|
# Which learning rates we'll test during the descent
|
|
minLR = 0.0
|
|
maxLR = 1.0
|
|
incrementLR = 0.01
|
|
|
|
###########################################################################
|
|
|
|
# ################################# Begin #################################
|
|
|
|
# Will contain each learning rate we'll test
|
|
T = np.arange(minLR, maxLR, incrementLR)
|
|
|
|
# The effective descent path
|
|
path = graDescent(x_start, y_start, T, p)
|
|
|
|
# Let's prompt this path
|
|
promptPath(path)
|
|
|
|
# ################################## End ##################################
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|