161 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			161 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python3
 | |
| 
 | |
| 
 | |
| """
 | |
| @authors: Yann & Sam'
 | |
| """
 | |
| 
 | |
| 
 | |
| from math import sqrt
 | |
| 
 | |
| import matplotlib.pyplot as plt
 | |
| 
 | |
| import numpy as np
 | |
| 
 | |
| 
 | |
| """
 | |
| Example of gradient descent of the Rosenbrock function (based on Cauchy method)
 | |
| |--> Without Theano !
 | |
| |--> That's one of the answer to: "How to find out the best learning rate ?"
 | |
| """
 | |
| 
 | |
| 
 | |
| ###############################################################################
 | |
| 
 | |
| 
 | |
| # This function returns the optimum "path" of the descent, determined by
 | |
| #   computing
 | |
| def graDescent(x_start, y_start, T, p):
 | |
| 
 | |
|     # Just returns the value of the Rosenbrock function at a given point
 | |
|     def rosenbrockFunction(x_y, p):
 | |
| 
 | |
|         # Let's unpack the coordinates tuple
 | |
|         x, y = x_y
 | |
| 
 | |
|         return (x - 1)**2 + p * (x**2 - y)**2
 | |
| 
 | |
|     # Returns the learning rate which introduced the best minimum reached
 | |
|     #   during the descent
 | |
|     def bestLearningRate(T, X_Y, p):
 | |
| 
 | |
|         L = []
 | |
| 
 | |
|         # Now let's fill in the list with each values of Rosenbrock's function
 | |
|         for i in range(0, len(X_Y)):
 | |
| 
 | |
|             L.append(rosenbrockFunction(X_Y[i], p))
 | |
| 
 | |
|         # We just have to return the learning rate present at the index of the
 | |
|         #   minimum of 'L' !
 | |
|         return T[L.index(min(L))]
 | |
| 
 | |
|     # Just computes the gradient of the Rosenbrock function at the point we
 | |
|     #   pass as parameter
 | |
|     def gradient(x, y, p):
 | |
| 
 | |
|         return (2 * (x - 1) + 40 * x * (x**2 - y), -2 * p * (x**2 - y))
 | |
| 
 | |
|     # Will contain each point we'll effectively pass through during the descent
 | |
|     P = []
 | |
| 
 | |
|     # Initial conditions (startup point coordinates of gradient descent)
 | |
|     x, y = (x_start, y_start)
 | |
| 
 | |
|     # We add the startup point as beginning of the descent
 | |
|     P.append((x, y))
 | |
| 
 | |
|     # In this loop, we keep computing the gradient descent while its norm is
 | |
|     #   superior to '0.01' .
 | |
|     # It allows the program to stop itself when the descent reaches a minimum.
 | |
|     while True:
 | |
| 
 | |
|         # Will contain the coordinates of each point we'll pass through during
 | |
|         #   the learning rates test
 | |
|         X_Y = []
 | |
| 
 | |
|         # Here we get the last point where we stopped the descent
 | |
|         x_p, y_p = P[-1]
 | |
| 
 | |
|         # We compute the gradient at this point
 | |
|         g_x, g_y = gradient(x_p, y_p, p)
 | |
| 
 | |
|         # Two new variables to compute the descent without altering the
 | |
|         #   other ones
 | |
|         x, y = x_p, y_p
 | |
| 
 | |
|         # In this loop, we'll try many descents with different learning rates,
 | |
|         # and we'll keep the best one at the end
 | |
|         for t in T:
 | |
| 
 | |
|             # Gradient descent !
 | |
|             x = x_p - t * g_x
 | |
|             y = y_p - t * g_y
 | |
| 
 | |
|             # We add the new coordinates to the list with all of them
 | |
|             X_Y.append((x, y))
 | |
| 
 | |
|         # Let's get the learning rate which gave us the best minimum reached
 | |
|         #   during the descent
 | |
|         min_t = bestLearningRate(T, X_Y, p)
 | |
| 
 | |
|         # Let's add the best minimum point reached to the "path" of the descent
 | |
|         P.append((x_p - min_t * g_x, y_p - min_t * g_y))
 | |
| 
 | |
|         if sqrt(g_x**2 + g_y**2) < 0.01:
 | |
| 
 | |
|             break
 | |
| 
 | |
|     return P
 | |
| 
 | |
| 
 | |
| def promptPath(path):
 | |
| 
 | |
|     # The list of tuples becomes here a list of two lists as :
 | |
|     #   [[x0, ..., xn], [y0, ..., yn]]
 | |
|     L = list(map(list, zip(*path)))
 | |
| 
 | |
|     # New figure for the plots !
 | |
|     plt.figure("Gradient descent of Rosenbrock's function")
 | |
| 
 | |
|     # Adding the path to the figure
 | |
|     plt.plot(L[0], L[1], 'b')
 | |
| 
 | |
|     plt.show(block=True)
 | |
| 
 | |
| 
 | |
| def main():
 | |
| 
 | |
|     # ############################## Parameters ##############################
 | |
| 
 | |
|     # Rosenbrock's parameter
 | |
|     p = 10
 | |
| 
 | |
|     # Startup descent point
 | |
|     x_start = 1.5
 | |
|     y_start = 0.5
 | |
| 
 | |
|     # Which learning rates we'll test during the descent
 | |
|     minLR = 0.0
 | |
|     maxLR = 1.0
 | |
|     incrementLR = 0.01
 | |
| 
 | |
|     ###########################################################################
 | |
| 
 | |
|     # ################################# Begin #################################
 | |
| 
 | |
|     # Will contain each learning rate we'll test
 | |
|     T = np.arange(minLR, maxLR, incrementLR)
 | |
| 
 | |
|     # The effective descent path
 | |
|     path = graDescent(x_start, y_start, T, p)
 | |
| 
 | |
|     # Let's prompt this path
 | |
|     promptPath(path)
 | |
| 
 | |
|     # ################################## End ##################################
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     main()
 | 
