182 lines
4.7 KiB
Python
Executable File
182 lines
4.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
|
|
"""
|
|
@authors: Yann & Sam'
|
|
"""
|
|
|
|
|
|
import random
|
|
|
|
import numpy as np
|
|
|
|
|
|
"""
|
|
A simple program to test some different methods to find out
|
|
a "good learning rate" for each iteration easily.
|
|
The whole program runs without Theano, because it used to be a period
|
|
when we couldn't get something to work with it...
|
|
"""
|
|
|
|
|
|
###############################################################################
|
|
|
|
|
|
# That's the cost function
|
|
def cost(x, y, theta):
|
|
|
|
return (np.dot(x, theta) - y) * (np.dot(x, theta) - y)
|
|
|
|
|
|
# Returns the average of all the values of the cost function for each examples
|
|
def costAverage(coordinates, theta):
|
|
|
|
average = 0.
|
|
|
|
for x, y in coordinates:
|
|
|
|
average += (np.dot(x, theta) - y)**2
|
|
|
|
return average / (2. * len(coordinates))
|
|
|
|
|
|
# Simple gradient computing
|
|
def gradient(x, y, theta):
|
|
|
|
return np.dot((np.dot(x, theta) - y), x)
|
|
|
|
|
|
# Returns an "averaged" gradient computed with each training examples
|
|
def gradientAverage(coordinates, theta):
|
|
|
|
average = 0.
|
|
|
|
for x, y in coordinates:
|
|
|
|
average += np.dot((np.dot(x, theta) - y), x)
|
|
|
|
return average / len(coordinates)
|
|
|
|
|
|
# Generate random coordinates which we'll use as training examples
|
|
def getRandomCoordinatesVectors(nbParameters, leadingCoefficients, nbExamples,
|
|
intervalWidth, intervalValues):
|
|
|
|
coordinates = []
|
|
|
|
for i in range(nbExamples):
|
|
|
|
X = [1]
|
|
|
|
for j in range(nbParameters):
|
|
|
|
X.append(random.uniform(*intervalValues))
|
|
|
|
y = np.dot(X, leadingCoefficients) + \
|
|
random.uniform(-abs(intervalWidth), abs(intervalWidth))
|
|
coordinates.append((X, y))
|
|
|
|
return coordinates
|
|
|
|
|
|
# This function updates alpha and theta for each iteration of
|
|
# the gradient descent.
|
|
# It will use the 'usedCost' (either 'cost' or 'costAverage') and the
|
|
# 'usedGradient' (either 'gradient' or 'gradientAverage') to perform
|
|
# the update.
|
|
# 'args' must correspond to the list of arguments needed specifically for
|
|
# the 'usedCost' or the 'usedGradient' (meaning theta is not included
|
|
# in that list).
|
|
# The 'flag' corresponds to the used method for updating alpha.
|
|
def update(flag, usedCost, usedGradient, args, theta, thetaOld, alpha):
|
|
|
|
theta = theta - alpha * usedGradient(*args, theta)
|
|
|
|
if flag == 'try':
|
|
|
|
if usedCost(*args, thetaOld) <= usedCost(*args, theta):
|
|
|
|
alpha = alpha * 0.5
|
|
|
|
thetaOld = theta
|
|
|
|
elif flag == 'Wolfe':
|
|
|
|
grad = usedGradient(*args, theta)
|
|
gradNext = usedGradient(*args, theta - alpha * grad)
|
|
Jtheta = usedCost(*args, theta)
|
|
JthetaNext = usedCost(*args, theta - alpha * grad)
|
|
|
|
if -np.dot(grad, gradNext) >= -0.01 * np.dot(grad, grad) and \
|
|
JthetaNext <= Jtheta - 0.0001 * alpha * np.dot(grad, grad):
|
|
|
|
alpha = alpha * 0.5
|
|
|
|
elif flag == 'B&B':
|
|
|
|
grad = usedGradient(*args, theta)
|
|
deltaGrad = usedGradient(*args, theta - alpha * grad) - grad
|
|
|
|
if np.dot(-alpha * grad, deltaGrad) != 0 and \
|
|
np.dot(deltaGrad, deltaGrad) != 0:
|
|
|
|
alpha = np.dot(-alpha * grad, deltaGrad) / np.dot(deltaGrad,
|
|
deltaGrad)
|
|
|
|
return alpha, theta
|
|
|
|
|
|
def main():
|
|
|
|
# ############################## Parameters ###############################
|
|
|
|
nbIterations = 100
|
|
nbParameters = 1
|
|
leadingCoefficients = [1, 2]
|
|
nbExamples = 25
|
|
intervalWidth = 0
|
|
intervalValues = -10, 10
|
|
|
|
###########################################################################
|
|
|
|
# ################################# Begin #################################
|
|
|
|
coordinates = getRandomCoordinatesVectors(
|
|
nbParameters, leadingCoefficients, nbExamples,
|
|
intervalWidth, intervalValues)
|
|
|
|
alpha = 0.1
|
|
theta = np.array([49., 27.])
|
|
thetaOld = np.array([0.1, 0.1])
|
|
|
|
# Hard-coded for testing (change this if needed).
|
|
flagMethod = 'B&B'
|
|
flagGradient = 'avg'
|
|
|
|
for i in range(nbIterations):
|
|
|
|
if flagGradient == 'avg': # 'avg' as in 'Average'
|
|
|
|
alpha, theta = update(flagMethod, costAverage, gradientAverage,
|
|
[coordinates], theta, thetaOld, alpha)
|
|
thetaOld = theta
|
|
|
|
elif flagGradient == 'ebe': # 'ebe' as in 'example by example'
|
|
for x, y in coordinates:
|
|
|
|
alpha, theta = update(flagMethod, cost, gradient,
|
|
[x, y], theta, thetaOld, alpha)
|
|
thetaOld = theta
|
|
|
|
if alpha == 0.:
|
|
|
|
break
|
|
|
|
print(theta)
|
|
|
|
# ################################## End ##################################
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|