MINDLE/dev_null/Museum/linearRegressionCauchy.py

#!/usr/bin/env python3


"""
@authors: Yann & Sam'
"""


import random

import matplotlib.pyplot as plt

import numpy as np


"""
Simple linear regression with Cauchy method computed into two ways:

- "Point by point": For each example we try to adjust the parameters
- "With average": The parameters are computed based on the average of
                    the values of the cost function
"""


###############################################################################


# Simple cost function
def cost(x, y, theta):

    return (np.dot(x, theta) - y) * (np.dot(x, theta) - y)


# Returns the average of all the values of the cost function for each examples
def costAverage(coordinates, theta):

    average = 0.

    for x, y in coordinates:

        average += (np.dot(x, theta) - y)**2

    return average / (2. * len(coordinates))


# Simple gradient function
def gradient(x, y, theta):

    return np.dot((np.dot(x, theta) - y), x)


# Returns an "averaged" gradient computed with each training examples
def gradientAverage(coordinates, theta):

    average = 0

    for x, y in coordinates:

        average += np.dot((np.dot(x, theta) - y), x)

    return average / len(coordinates)


# Generate random coordinates which we'll use as training examples
def getRandomCoordinatesVectors(nbParameters, leadingCoefficients, nbExamples,
                                intervalWidth):

    coordinates = []

    for i in range(nbExamples):

        X = [1]

        for j in range(nbParameters):

            X.append(random.uniform(-abs(nbExamples), abs(nbExamples)))

        y = np.dot(X, leadingCoefficients) + \
            random.uniform(-abs(intervalWidth), abs(intervalWidth))

        coordinates.append((X, y))

    return coordinates


# Returns the learning rate which introduced the best minimum reached during
#   the descent
def bestLearningRate(x, y, THETA, T):

    L = []

    for theta in THETA:

        L.append(cost(x, y, theta))

    # We just have to return the learning rate present at the index of the
    #   minimum of 'L' !
    return T[L.index(min(L))]


# Same function as above but with the 'costAverage()' function ;-)
def bestLearningRateAverage(coordinates, THETA, T):

    L = []

    for theta in THETA:

        L.append(costAverage(coordinates, theta))

    return T[L.index(min(L))]


# This function tries many different values of learning rate in order to find
#   the best one using 'bestLearningRate()' function
def gradientDescent(coordinates, theta_start, T, nbIterations):

    # Will contain each point we'll effectively pass through during the descent
    P = []

    # We add the startup point as beginning of the descent
    P.append(theta_start)

    # In this loop, we keep computing the gradient descent while its norm is
    #   superior to '0.01' .
    # It allows the program to stop itself when the descent reaches a minimum.
    for i in range(nbIterations):

        # Here we get the last point where we stopped the descent
        theta_pred = P[-1]

        for x, y in coordinates:

            # Will contain the coordinates of each point we'll pass through
            #   during the learning rates test
            THETA = []

            # We compute the gradient at this point
            grad = gradient(x, y, theta_pred)

            # In this loop, we'll try many descents with different learning
            #   rates, and we'll keep the best one at the end
            for t in T:

                # Gradient descent !
                theta_next = theta_pred - t * grad

                # We add the new coordinates to the lists with all of them
                THETA.append(theta_next)

            # Let's get the learning rate which gave us the best minimum
            #   reached during the descent
            min_t = bestLearningRate(x, y, THETA, T)

            # Let's add the best minimum point reached to the "path"
            #   of the descent
            P.append(theta_pred - min_t * grad)

        # print(P[-1])

    return P


# Works the same way as gradientDescent using 'costAverage()',
#   'gradientAverage()' and 'bestLearningRateAverage()' instead of
#   respectively 'cost()', 'gradient()' and 'bestLearningRate()'
def gradientDescentAverage(coordinates, theta_start, T, nbIterations):

    P = []

    P.append(theta_start)

    for i in range(nbIterations):

        THETA = []

        theta_pred = P[-1]

        grad = gradientAverage(coordinates, theta_pred)

        for t in T:

            theta_next = theta_pred - t * grad

            THETA.append(theta_next)

        min_t = bestLearningRateAverage(coordinates, THETA, T)

        P.append(theta_pred - min_t * grad)

        # print(P[-1])

    return P


def main():

    # ############################## Parameters ##############################

    nbIterations = 10
    nbIterationsAverage = 30
    nbParameters = 1
    leadingCoefficients = [2, 4]
    nbExamples = 25
    intervalWidth = 0

    # Startup descent point
    theta_start = [0., 0.]

    # Which learning rates we'll test during the descent
    minLR = 0.0
    maxLR = 1.0
    incrementLR = 0.001

    # Will contain each learning rate we'll test
    T = np.arange(minLR, maxLR, incrementLR)

    ###########################################################################

    # ################################# Begin #################################

    coordinates = getRandomCoordinatesVectors(nbParameters,
                                              leadingCoefficients,
                                              nbExamples,
                                              intervalWidth)

    # The effective descent path
    path = gradientDescent(coordinates, theta_start, T, nbIterations)

    # Let's from here compute an "averaged" gradient descent on the latest
    #   parameters values added !
    path.extend(
        gradientDescentAverage(coordinates, path[-1], T, nbIterationsAverage)
    )

    # Now we keep going with a "simple" gradient descent from the latest
    #   parameters added again by the line just above.
    path.extend(gradientDescent(coordinates, path[-1], T, nbIterations))

    plt.figure("Affine Regression with Cauchy method")

    plt.plot(np.arange(len(path)), path)
    plt.show(block=True)

    # ################################## End ##################################


if __name__ == '__main__':
    main()