MINDLE/dev_null/Theano/LinearRegression/affineRegression.py

#!/usr/bin/env python3


"""
@authors: Yann & Sam'
"""


import random

from computeIterations import computeIteration
from computeIterations import computeIterationAvg

import matplotlib.pyplot as plt

import numpy as np


###############################################################################


# This function generates 'nbPoints' tuples of coordinates "drowned" in some
#   noises of variance 'intervalWidth', according to a straight line of slope
#   'leadingCoeficient[0]' + 'leadingCoeficient[1]'.
# These tuples are stocked in a list.
def getRandomCoordinates(leadingCoeficient, nbPoints, intervalWidth):

    coordinates = []

    for i in range(nbPoints):

        x, y = i, (i * leadingCoeficient[1]) + leadingCoeficient[0] + \
                  random.uniform(-abs(intervalWidth), abs(intervalWidth))

        coordinates.append((x, y))

    return coordinates


def addCoordinates(coordinates):

    # Adds some points, represented by their coordinates, for next display
    #   def addCoordonates(coordinates) :
    # The list of tuples becomes here a list of two lists as :
    #   [[x0, ..., xn], [y0, ..., yn]]
    L = list(map(list, zip(*coordinates)))

    plt.plot(L[0], L[1], 'r')


# Warms up window for next display
def setDisplay(leadingCoeficient, nbPoints, intervalWidth, displayTitle):

    # Creates the window used next
    plt.figure('Affine Regression')

    # Some display parameters
    plt.title(displayTitle)
    plt.xlabel('Input')
    plt.ylabel('Output')
    plt.grid(True)
    plt.axis([0,
              nbPoints,
              -abs(intervalWidth) + leadingCoeficient[0],
              (nbPoints * leadingCoeficient[1]) +
              leadingCoeficient[0] + abs(intervalWidth)])


# Adds to the display the legends, axes, points, and the calculated
#   affine regression
def displayWindow(nbPoints, theta, color):

    # "time" vector
    t = np.arange(0., nbPoints)

    # Affine regression added here
    plt.plot(t, (t * theta[1]) + theta[0], color=color,
             label="y = {} * x + {}".format(
                round(float(theta[1]), 3),
                round(float(theta[0]), 3)
                )
             )

    # Places the legend in the upper left corner
    plt.legend(loc="upper left", frameon=True)


# Tries the quick method of normal equation to find out those parameters,
#   regrouped in 'theta'
def normalEquation(X, Y):

    # Formula: theta = (tX * X)^-1 * tX * y,
    #   where 'X' represents the inputs in columns,
    #   'tX', the transposed of 'X',
    #   and 'Y' the outputs represented in columns too

    tX = np.transpose(X)

    thetaNE = np.dot(np.dot(np.linalg.inv(np.dot(tX, X)), tX), Y)

    return thetaNE


# Here, we build the 'X' and 'Y' vectors which contain respectively the inputs
#   and the outputs in columns
def getColumnsVectors(coordinates):

    L = list(map(list, zip(*coordinates)))
    X = []

    for i in range(len(L[0])):

        X.append([1, L[0][i]])

    Y = L[1]

    return X, Y


def main():

    # ############################## Parameters ###############################

    nbIterations = 100
    nbIterationsAvg = 1000

    alpha = 0.001
    alphaAvg = 0.001

    # The leading coefficient and the constant of the affine line, here :
    #   y = 2 * x + 1
    leadingCoeficient = [1, 2]

    nbPoints = 25
    intervalWidth = 3

    # ######################### Computing iterations  #########################

    # Creates the function that makes an iteration of the gradient descent
    training, theta = computeIteration(alpha, 2)

    # Same stuff with an averaged gradient descent
    trainingAvg, thetaAvg = computeIterationAvg(alphaAvg, 2, nbPoints)

    # ################################# Begin #################################

    setDisplay(leadingCoeficient, nbPoints, intervalWidth,
               "Getting close by affine regression with "
               "(blue: ex by ex, red: average) and without (green) gradient")

    coordinates = getRandomCoordinates(leadingCoeficient,
                                       nbPoints,
                                       intervalWidth)
    addCoordinates(coordinates)

    # Linear Regression with gradient

    for i in range(nbIterations):

        for x, y in coordinates:

            training([1, x], y)

    #################################

    inputs, outputs = zip(*coordinates)

    inputs = list(inputs)
    inputs = [(1, x) for x in inputs]
    outputs = [list(outputs)]

    for i in range(nbIterationsAvg):

        trainingAvg(inputs, outputs)
    #################################

    # Linear Regression without gradient

    X, Y = getColumnsVectors(coordinates)
    thetaNE = normalEquation(X, Y)

    ####################################

    displayWindow(nbPoints, theta.get_value(), 'blue')
    displayWindow(nbPoints, thetaAvg.get_value(), 'red')
    displayWindow(nbPoints, thetaNE, 'green')

    plt.show(block=True)

    # ################################## End ##################################


if __name__ == '__main__':
    main()