MINDLE/dev_null/Theano/learningRateDiscovering.py

#!/usr/bin/env python3


"""
@authors: Yann & Sam'
"""


import random

import matplotlib.pyplot as plt

import numpy as np

import theano


###############################################################################


# This function generates 'nbPoints' tuples of coordinates "drowned" in some
#   noises of variance 'intervalWidth', according to a straight line of slope
#   'leadingCoeficient'.
# These tuples are stocked in a list.
def getRandomCoordinates(leadingCoeficient, nbPoints, intervalWidth):

    coordinates = []

    for i in range(nbPoints):

        x, y = i, (i * leadingCoeficient) + \
                  random.uniform(-abs(intervalWidth), abs(intervalWidth))

        coordinates.append((x, y))

    return coordinates


# Adds some points, represented by their coordinates, for next display
def addCoordonates(coordinates):

    # The list of tuples becomes here a list of two lists as :
    #   [[x0, ..., xn], [y0, ..., yn]]
    L = list(map(list, zip(*coordinates)))

    plt.plot(L[0], L[1], 'r')


# Warms up window for next display
def setDisplay(leadingCoeficient, nbPoints, intervalWidth, displayTitle):

    # Creates the window used next
    plt.figure('Linear Regression')

    # Some display parameters
    plt.title(displayTitle)
    plt.xlabel('Input')
    plt.ylabel('Output')
    plt.grid(True)
    plt.axis([0,
              nbPoints,
              -abs(intervalWidth),
              nbPoints * leadingCoeficient + abs(intervalWidth)])


# Adds to the display the legends, axes, points, and the calculated
#   linear regression
def displayWindow(nbPoints, theta, myColor):

    # "time" vector
    t = np.arange(0., nbPoints)

    # Linear regression added here
    plt.plot(t, t * theta, color=myColor,
             label="y = {} * x".format(round(float(theta), 3)))

    # Places the legend in the upper left corner
    plt.legend(loc="upper left", frameon=True)


def main():

    # ########################### THEANO BLACK BOX ###########################

    X = theano.tensor.scalar()
    Y = theano.tensor.scalar()

    theta = theano.shared(np.asarray(1.0))
    y = theta * X  # <-- Model

    Jtheta = theano.tensor.mean(theano.tensor.sqr(y - Y))
    gradient = theano.tensor.grad(cost=Jtheta, wrt=theta)

    alpha = theano.shared(np.asarray(0.005))
    thetaOld = theano.shared(np.asarray(0.0))
    gradientOld = theano.shared(np.asarray(0.0))

    """
    Following updates are made to run the Barzilai & Borwein's adaptive
      step size (for the learning rate) algorithm.
    """
    updateTheta = [theta, theta - alpha * gradient]
    updateAlpha = [alpha, (theta - thetaOld) / (gradient - gradientOld)]
    updateThetaOld = [thetaOld, theta]
    updateGradientOld = [gradientOld, gradient]

    training = theano.function(inputs=[X, Y],
                               outputs=Jtheta,
                               updates=[updateTheta,
                                        updateAlpha,
                                        updateThetaOld,
                                        updateGradientOld],
                               allow_input_downcast=True)

    ###########################################################################

    nbIterations = 10

    leadingCoeficient = 2
    nbPoints = 25
    intervalWidth = 3

    setDisplay(leadingCoeficient, nbPoints, intervalWidth,
               "Getting close by linear regression with (blue) and without"
               " (green) gradient")

    coordinates = getRandomCoordinates(leadingCoeficient,
                                       nbPoints,
                                       intervalWidth)
    addCoordonates(coordinates)

    """
    If the input is equal to zero, then the gradient is equal to zero
      (in this situation) which will cause a division by zero on the first
      iteration. So if the input of the first coordinate happens to be zero,
      you just have to switch the coordinate with a non-zero input coordinate
      to avoid the problem.
    """
    if coordinates[0][0] == 0:

        """
        Here you consider that there aren't two coordinates with the same
          input. So if the input of the first coordinate is zero, you are
          guaranteed that the input of the 2nd coordinate isn't zero.
        """
        coordinates[0], coordinates[1] = coordinates[1], coordinates[0]

    for i in range(nbIterations):

        for x, y in coordinates:

            training(x, y)

    # ################## Linear Regression without gradient ###################

    values = []

    for x, y in coordinates:

        if x != 0:

            values.append(y / x)

    average = sum(values) / len(values)

    ###########################################################################

    displayWindow(nbPoints, average, 'green')
    displayWindow(nbPoints, theta.get_value(), 'blue')

    plt.show(block=True)


if __name__ == '__main__':
    main()