249 lines
6.3 KiB
Python
Executable File
249 lines
6.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
|
|
"""
|
|
@authors: Yann & Sam'
|
|
"""
|
|
|
|
|
|
import random
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import numpy as np
|
|
|
|
|
|
"""
|
|
Simple linear regression with Cauchy method computed into two ways:
|
|
|
|
- "Point by point": For each example we try to adjust the parameters
|
|
- "With average": The parameters are computed based on the average of
|
|
the values of the cost function
|
|
"""
|
|
|
|
|
|
###############################################################################
|
|
|
|
|
|
# Simple cost function
|
|
def cost(x, y, theta):
|
|
|
|
return (np.dot(x, theta) - y) * (np.dot(x, theta) - y)
|
|
|
|
|
|
# Returns the average of all the values of the cost function for each examples
|
|
def costAverage(coordinates, theta):
|
|
|
|
average = 0.
|
|
|
|
for x, y in coordinates:
|
|
|
|
average += (np.dot(x, theta) - y)**2
|
|
|
|
return average / (2. * len(coordinates))
|
|
|
|
|
|
# Simple gradient function
|
|
def gradient(x, y, theta):
|
|
|
|
return np.dot((np.dot(x, theta) - y), x)
|
|
|
|
|
|
# Returns an "averaged" gradient computed with each training examples
|
|
def gradientAverage(coordinates, theta):
|
|
|
|
average = 0
|
|
|
|
for x, y in coordinates:
|
|
|
|
average += np.dot((np.dot(x, theta) - y), x)
|
|
|
|
return average / len(coordinates)
|
|
|
|
|
|
# Generate random coordinates which we'll use as training examples
|
|
def getRandomCoordinatesVectors(nbParameters, leadingCoefficients, nbExamples,
|
|
intervalWidth):
|
|
|
|
coordinates = []
|
|
|
|
for i in range(nbExamples):
|
|
|
|
X = [1]
|
|
|
|
for j in range(nbParameters):
|
|
|
|
X.append(random.uniform(-abs(nbExamples), abs(nbExamples)))
|
|
|
|
y = np.dot(X, leadingCoefficients) + \
|
|
random.uniform(-abs(intervalWidth), abs(intervalWidth))
|
|
|
|
coordinates.append((X, y))
|
|
|
|
return coordinates
|
|
|
|
|
|
# Returns the learning rate which introduced the best minimum reached during
|
|
# the descent
|
|
def bestLearningRate(x, y, THETA, T):
|
|
|
|
L = []
|
|
|
|
for theta in THETA:
|
|
|
|
L.append(cost(x, y, theta))
|
|
|
|
# We just have to return the learning rate present at the index of the
|
|
# minimum of 'L' !
|
|
return T[L.index(min(L))]
|
|
|
|
|
|
# Same function as above but with the 'costAverage()' function ;-)
|
|
def bestLearningRateAverage(coordinates, THETA, T):
|
|
|
|
L = []
|
|
|
|
for theta in THETA:
|
|
|
|
L.append(costAverage(coordinates, theta))
|
|
|
|
return T[L.index(min(L))]
|
|
|
|
|
|
# This function tries many different values of learning rate in order to find
|
|
# the best one using 'bestLearningRate()' function
|
|
def gradientDescent(coordinates, theta_start, T, nbIterations):
|
|
|
|
# Will contain each point we'll effectively pass through during the descent
|
|
P = []
|
|
|
|
# We add the startup point as beginning of the descent
|
|
P.append(theta_start)
|
|
|
|
# In this loop, we keep computing the gradient descent while its norm is
|
|
# superior to '0.01' .
|
|
# It allows the program to stop itself when the descent reaches a minimum.
|
|
for i in range(nbIterations):
|
|
|
|
# Here we get the last point where we stopped the descent
|
|
theta_pred = P[-1]
|
|
|
|
for x, y in coordinates:
|
|
|
|
# Will contain the coordinates of each point we'll pass through
|
|
# during the learning rates test
|
|
THETA = []
|
|
|
|
# We compute the gradient at this point
|
|
grad = gradient(x, y, theta_pred)
|
|
|
|
# In this loop, we'll try many descents with different learning
|
|
# rates, and we'll keep the best one at the end
|
|
for t in T:
|
|
|
|
# Gradient descent !
|
|
theta_next = theta_pred - t * grad
|
|
|
|
# We add the new coordinates to the lists with all of them
|
|
THETA.append(theta_next)
|
|
|
|
# Let's get the learning rate which gave us the best minimum
|
|
# reached during the descent
|
|
min_t = bestLearningRate(x, y, THETA, T)
|
|
|
|
# Let's add the best minimum point reached to the "path"
|
|
# of the descent
|
|
P.append(theta_pred - min_t * grad)
|
|
|
|
# print(P[-1])
|
|
|
|
return P
|
|
|
|
|
|
# Works the same way as gradientDescent using 'costAverage()',
|
|
# 'gradientAverage()' and 'bestLearningRateAverage()' instead of
|
|
# respectively 'cost()', 'gradient()' and 'bestLearningRate()'
|
|
def gradientDescentAverage(coordinates, theta_start, T, nbIterations):
|
|
|
|
P = []
|
|
|
|
P.append(theta_start)
|
|
|
|
for i in range(nbIterations):
|
|
|
|
THETA = []
|
|
|
|
theta_pred = P[-1]
|
|
|
|
grad = gradientAverage(coordinates, theta_pred)
|
|
|
|
for t in T:
|
|
|
|
theta_next = theta_pred - t * grad
|
|
|
|
THETA.append(theta_next)
|
|
|
|
min_t = bestLearningRateAverage(coordinates, THETA, T)
|
|
|
|
P.append(theta_pred - min_t * grad)
|
|
|
|
# print(P[-1])
|
|
|
|
return P
|
|
|
|
|
|
def main():
|
|
|
|
# ############################## Parameters ##############################
|
|
|
|
nbIterations = 10
|
|
nbIterationsAverage = 30
|
|
nbParameters = 1
|
|
leadingCoefficients = [2, 4]
|
|
nbExamples = 25
|
|
intervalWidth = 0
|
|
|
|
# Startup descent point
|
|
theta_start = [0., 0.]
|
|
|
|
# Which learning rates we'll test during the descent
|
|
minLR = 0.0
|
|
maxLR = 1.0
|
|
incrementLR = 0.001
|
|
|
|
# Will contain each learning rate we'll test
|
|
T = np.arange(minLR, maxLR, incrementLR)
|
|
|
|
###########################################################################
|
|
|
|
# ################################# Begin #################################
|
|
|
|
coordinates = getRandomCoordinatesVectors(nbParameters,
|
|
leadingCoefficients,
|
|
nbExamples,
|
|
intervalWidth)
|
|
|
|
# The effective descent path
|
|
path = gradientDescent(coordinates, theta_start, T, nbIterations)
|
|
|
|
# Let's from here compute an "averaged" gradient descent on the latest
|
|
# parameters values added !
|
|
path.extend(
|
|
gradientDescentAverage(coordinates, path[-1], T, nbIterationsAverage)
|
|
)
|
|
|
|
# Now we keep going with a "simple" gradient descent from the latest
|
|
# parameters added again by the line just above.
|
|
path.extend(gradientDescent(coordinates, path[-1], T, nbIterations))
|
|
|
|
plt.figure("Affine Regression with Cauchy method")
|
|
|
|
plt.plot(np.arange(len(path)), path)
|
|
plt.show(block=True)
|
|
|
|
# ################################## End ##################################
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|