MINDLE/dev_null/Theano/learningRateDiscoveringVectors.py

#!/usr/bin/env python3


"""
@authors: Yann & Sam'
"""

import random

import numpy as np

import theano
import theano.tensor as T


"""
The goal of this program is to make a linear regression for any number of
  parameters in the input.
"""


"""
Due to our moving from Theano to TensorFlow, we've stopped the development of
  this program.
Theano didn't give us the possibility of get the gradient's value anytime
  during the training.
"""


###############################################################################


"""
This function generates 'nbExamples' tuples of coordinates.
The first member being the values of the input and the second member being the
  output corresponding to the input, to which is added some noise of variance
  'intervalWidth'. The output is linear in each parameters of the input.
'leadingCoefficients' contains all the slopes.
"""


def getRandomCoordinatesVectors(nbParameters, leadingCoefficients,
                                nbExamples, intervalWidth):

    # 'coordinates' will be created in this way :
    #   [([x0, ..., xn], y0), ..., ([x0, ..., xn], ym)]
    # (with 'n' the number of parameters, and 'm' the number of examples)
    coordinates = []

    for i in range(nbExamples):

        X = [1]

        for j in range(nbParameters):

            # Creates the input parameters with random values for
            #   each dimension.
            X.append(random.uniform(-abs(nbExamples), abs(nbExamples)))

        # Creates the output corresponding to the input.
        y = np.dot(X, leadingCoefficients) + \
            random.uniform(-abs(intervalWidth), abs(intervalWidth))
        # --> 'np.dot()': Given two 1-D vectors, returns the inner product of
        #                 the two vectors.

        coordinates.append((X, y))

    return coordinates


def main():

    # ############################## Parameters ##############################

    nbIterations = 1000

    leadingCoefficients = [i for i in range(1, 4)]
    nbParameters = len(leadingCoefficients) - 1
    nbExamples = 75
    intervalWidth = 0

    ###########################################################################

    coordinates = getRandomCoordinatesVectors(nbParameters,
                                              leadingCoefficients,
                                              nbExamples,
                                              intervalWidth)

    x, y = zip(*coordinates)

    x = list(x)
    y = list(y)

    # ########################### THEANO BLACK BOX ############################

    X = T.matrix()
    Y = T.row()

    # First value of 'alpha'. Actually, whatever it is, this algorithm should
    #   always converge.
    alpha = theano.shared(np.asarray(1.))
    # At beginning, all parameters are equals to '0'
    theta = theano.shared(np.zeros(len(leadingCoefficients)))

    # Just the first gradient is needed to be computed manually
    #   (value for (and only for) the first iteration)
    gradientOld = theano.shared(
        np.asarray(
            sum(
                [np.dot(-y[i], x[i]) for i in range(nbExamples)]
                ) / float(nbExamples)
            )
        )

    h = T.dot(X, theta)  # <-- Model

    diff = h - Y
    Jtheta = T.tensordot(diff, diff) / (2 * nbExamples)
    gradient = T.grad(cost=Jtheta, wrt=theta)

    # Here are the training updates, it's the Barzilai & Borwein
    #   implementation for the 'alpha' one.
    updateTheta = [theta, theta - alpha * gradient]
    updateAlpha = [alpha,
                   theano.tensor.dot(-alpha * gradientOld,
                                     gradient - gradientOld) /
                   theano.tensor.dot(gradient - gradientOld,
                                     gradient - gradientOld)]

    updateGradOld = [gradientOld, gradient]

    training = theano.function(inputs=[X, Y],
                               outputs=Jtheta,
                               updates=[updateTheta,
                                        updateAlpha,
                                        updateGradOld],
                               allow_input_downcast=True)

    ###########################################################################

    for i in range(nbIterations):

        # To avoid 'nan', there is a break condition for the training loop when
        #   the gradient becomes too small (it means that we're reaching the
        #   "good" parameters values)...
        if np.linalg.norm(gradientOld.get_value()) < 1.e-4:

            break

        # There is a cast on 'y' as a 'list' for a dimensional purpose
        training(x, [y])

    print(theta.get_value())


if __name__ == '__main__':
    main()