This repository has been archived on 2023-11-03. You can view files and clone it, but cannot push or open issues or pull requests.
MINDLE/dev_null/Museum/linearRegressionCauchy.py

249 lines
6.3 KiB
Python
Executable File

#!/usr/bin/env python3
"""
@authors: Yann & Sam'
"""
import random
import matplotlib.pyplot as plt
import numpy as np
"""
Simple linear regression with Cauchy method computed into two ways:
- "Point by point": For each example we try to adjust the parameters
- "With average": The parameters are computed based on the average of
the values of the cost function
"""
###############################################################################
# Simple cost function
def cost(x, y, theta):
return (np.dot(x, theta) - y) * (np.dot(x, theta) - y)
# Returns the average of all the values of the cost function for each examples
def costAverage(coordinates, theta):
average = 0.
for x, y in coordinates:
average += (np.dot(x, theta) - y)**2
return average / (2. * len(coordinates))
# Simple gradient function
def gradient(x, y, theta):
return np.dot((np.dot(x, theta) - y), x)
# Returns an "averaged" gradient computed with each training examples
def gradientAverage(coordinates, theta):
average = 0
for x, y in coordinates:
average += np.dot((np.dot(x, theta) - y), x)
return average / len(coordinates)
# Generate random coordinates which we'll use as training examples
def getRandomCoordinatesVectors(nbParameters, leadingCoefficients, nbExamples,
intervalWidth):
coordinates = []
for i in range(nbExamples):
X = [1]
for j in range(nbParameters):
X.append(random.uniform(-abs(nbExamples), abs(nbExamples)))
y = np.dot(X, leadingCoefficients) + \
random.uniform(-abs(intervalWidth), abs(intervalWidth))
coordinates.append((X, y))
return coordinates
# Returns the learning rate which introduced the best minimum reached during
# the descent
def bestLearningRate(x, y, THETA, T):
L = []
for theta in THETA:
L.append(cost(x, y, theta))
# We just have to return the learning rate present at the index of the
# minimum of 'L' !
return T[L.index(min(L))]
# Same function as above but with the 'costAverage()' function ;-)
def bestLearningRateAverage(coordinates, THETA, T):
L = []
for theta in THETA:
L.append(costAverage(coordinates, theta))
return T[L.index(min(L))]
# This function tries many different values of learning rate in order to find
# the best one using 'bestLearningRate()' function
def gradientDescent(coordinates, theta_start, T, nbIterations):
# Will contain each point we'll effectively pass through during the descent
P = []
# We add the startup point as beginning of the descent
P.append(theta_start)
# In this loop, we keep computing the gradient descent while its norm is
# superior to '0.01' .
# It allows the program to stop itself when the descent reaches a minimum.
for i in range(nbIterations):
# Here we get the last point where we stopped the descent
theta_pred = P[-1]
for x, y in coordinates:
# Will contain the coordinates of each point we'll pass through
# during the learning rates test
THETA = []
# We compute the gradient at this point
grad = gradient(x, y, theta_pred)
# In this loop, we'll try many descents with different learning
# rates, and we'll keep the best one at the end
for t in T:
# Gradient descent !
theta_next = theta_pred - t * grad
# We add the new coordinates to the lists with all of them
THETA.append(theta_next)
# Let's get the learning rate which gave us the best minimum
# reached during the descent
min_t = bestLearningRate(x, y, THETA, T)
# Let's add the best minimum point reached to the "path"
# of the descent
P.append(theta_pred - min_t * grad)
# print(P[-1])
return P
# Works the same way as gradientDescent using 'costAverage()',
# 'gradientAverage()' and 'bestLearningRateAverage()' instead of
# respectively 'cost()', 'gradient()' and 'bestLearningRate()'
def gradientDescentAverage(coordinates, theta_start, T, nbIterations):
P = []
P.append(theta_start)
for i in range(nbIterations):
THETA = []
theta_pred = P[-1]
grad = gradientAverage(coordinates, theta_pred)
for t in T:
theta_next = theta_pred - t * grad
THETA.append(theta_next)
min_t = bestLearningRateAverage(coordinates, THETA, T)
P.append(theta_pred - min_t * grad)
# print(P[-1])
return P
def main():
# ############################## Parameters ##############################
nbIterations = 10
nbIterationsAverage = 30
nbParameters = 1
leadingCoefficients = [2, 4]
nbExamples = 25
intervalWidth = 0
# Startup descent point
theta_start = [0., 0.]
# Which learning rates we'll test during the descent
minLR = 0.0
maxLR = 1.0
incrementLR = 0.001
# Will contain each learning rate we'll test
T = np.arange(minLR, maxLR, incrementLR)
###########################################################################
# ################################# Begin #################################
coordinates = getRandomCoordinatesVectors(nbParameters,
leadingCoefficients,
nbExamples,
intervalWidth)
# The effective descent path
path = gradientDescent(coordinates, theta_start, T, nbIterations)
# Let's from here compute an "averaged" gradient descent on the latest
# parameters values added !
path.extend(
gradientDescentAverage(coordinates, path[-1], T, nbIterationsAverage)
)
# Now we keep going with a "simple" gradient descent from the latest
# parameters added again by the line just above.
path.extend(gradientDescent(coordinates, path[-1], T, nbIterations))
plt.figure("Affine Regression with Cauchy method")
plt.plot(np.arange(len(path)), path)
plt.show(block=True)
# ################################## End ##################################
if __name__ == '__main__':
main()