This repository has been archived on 2023-11-03. You can view files and clone it, but cannot push or open issues or pull requests.
MINDLE/dev_null/Museum/learningRateNoTheano.py

182 lines
4.7 KiB
Python
Executable File

#!/usr/bin/env python3
"""
@authors: Yann & Sam'
"""
import random
import numpy as np
"""
A simple program to test some different methods to find out
a "good learning rate" for each iteration easily.
The whole program runs without Theano, because it used to be a period
when we couldn't get something to work with it...
"""
###############################################################################
# That's the cost function
def cost(x, y, theta):
return (np.dot(x, theta) - y) * (np.dot(x, theta) - y)
# Returns the average of all the values of the cost function for each examples
def costAverage(coordinates, theta):
average = 0.
for x, y in coordinates:
average += (np.dot(x, theta) - y)**2
return average / (2. * len(coordinates))
# Simple gradient computing
def gradient(x, y, theta):
return np.dot((np.dot(x, theta) - y), x)
# Returns an "averaged" gradient computed with each training examples
def gradientAverage(coordinates, theta):
average = 0.
for x, y in coordinates:
average += np.dot((np.dot(x, theta) - y), x)
return average / len(coordinates)
# Generate random coordinates which we'll use as training examples
def getRandomCoordinatesVectors(nbParameters, leadingCoefficients, nbExamples,
intervalWidth, intervalValues):
coordinates = []
for i in range(nbExamples):
X = [1]
for j in range(nbParameters):
X.append(random.uniform(*intervalValues))
y = np.dot(X, leadingCoefficients) + \
random.uniform(-abs(intervalWidth), abs(intervalWidth))
coordinates.append((X, y))
return coordinates
# This function updates alpha and theta for each iteration of
# the gradient descent.
# It will use the 'usedCost' (either 'cost' or 'costAverage') and the
# 'usedGradient' (either 'gradient' or 'gradientAverage') to perform
# the update.
# 'args' must correspond to the list of arguments needed specifically for
# the 'usedCost' or the 'usedGradient' (meaning theta is not included
# in that list).
# The 'flag' corresponds to the used method for updating alpha.
def update(flag, usedCost, usedGradient, args, theta, thetaOld, alpha):
theta = theta - alpha * usedGradient(*args, theta)
if flag == 'try':
if usedCost(*args, thetaOld) <= usedCost(*args, theta):
alpha = alpha * 0.5
thetaOld = theta
elif flag == 'Wolfe':
grad = usedGradient(*args, theta)
gradNext = usedGradient(*args, theta - alpha * grad)
Jtheta = usedCost(*args, theta)
JthetaNext = usedCost(*args, theta - alpha * grad)
if -np.dot(grad, gradNext) >= -0.01 * np.dot(grad, grad) and \
JthetaNext <= Jtheta - 0.0001 * alpha * np.dot(grad, grad):
alpha = alpha * 0.5
elif flag == 'B&B':
grad = usedGradient(*args, theta)
deltaGrad = usedGradient(*args, theta - alpha * grad) - grad
if np.dot(-alpha * grad, deltaGrad) != 0 and \
np.dot(deltaGrad, deltaGrad) != 0:
alpha = np.dot(-alpha * grad, deltaGrad) / np.dot(deltaGrad,
deltaGrad)
return alpha, theta
def main():
# ############################## Parameters ###############################
nbIterations = 100
nbParameters = 1
leadingCoefficients = [1, 2]
nbExamples = 25
intervalWidth = 0
intervalValues = -10, 10
###########################################################################
# ################################# Begin #################################
coordinates = getRandomCoordinatesVectors(
nbParameters, leadingCoefficients, nbExamples,
intervalWidth, intervalValues)
alpha = 0.1
theta = np.array([49., 27.])
thetaOld = np.array([0.1, 0.1])
# Hard-coded for testing (change this if needed).
flagMethod = 'B&B'
flagGradient = 'avg'
for i in range(nbIterations):
if flagGradient == 'avg': # 'avg' as in 'Average'
alpha, theta = update(flagMethod, costAverage, gradientAverage,
[coordinates], theta, thetaOld, alpha)
thetaOld = theta
elif flagGradient == 'ebe': # 'ebe' as in 'example by example'
for x, y in coordinates:
alpha, theta = update(flagMethod, cost, gradient,
[x, y], theta, thetaOld, alpha)
thetaOld = theta
if alpha == 0.:
break
print(theta)
# ################################## End ##################################
if __name__ == '__main__':
main()