MINDLE/dev_null/Theano/LinearRegression/computeIterations.py

#!/usr/bin/env python3


"""
@authors: Yann
"""


import numpy as np

import theano
import theano.tensor as T


def computeIteration(alpha, nbParameters):

    if nbParameters == 1:

        exInput = T.scalar()  # One example input

    else:

        exInput = T.vector()  # Same with a vector for multiple parameters

    exOutput = T.scalar()  # The corresponding output

    # The hypothesis on the leading coefficient
    theta = theano.shared(np.zeros(nbParameters))

    if nbParameters == 1:

        # The hypothesis on the output given the input 'exInput'
        h = theta * exInput

    else:

        h = T.dot(theta, exInput)

    # The cost function representing the difference between the hypothesis and
    #   the real output.
    Jtheta = T.mean(T.sqr(h - exOutput))
    gradient = T.grad(cost=Jtheta, wrt=theta)  # Gradient of the cost function

    # The goal is to find the theta minimizing Jtheta.
    # This theta will be the closest to the 'leadingCoefficient'.
    # To do so, we use the gradient descent method.
    # The next line is the update made on theta for each iteration of
    #   the gradient descent.
    update = [theta, theta - alpha * gradient]

    # Creates the training function that makes all updates for a gradient
    # descent iteration given an input and the corresponding output.
    training = theano.function(inputs=[exInput, exOutput],
                               outputs=Jtheta,
                               updates=[update],
                               allow_input_downcast=True)

    return training, theta


def computeIterationAvg(alpha, nbParameters, nbExamples):

    if nbParameters == 1:

        exInputs = T.row()  # List of all the examples' input

    else:

        exInputs = T.matrix()  # Same, but a matrix for multiple parameters

    exOutputs = T.row()  # The corresponding list of all the examples' output

    if nbParameters == 1:

        # The hypothesis on the leading coefficients
        thetaAvg = theano.shared(np.asarray(0.))

    else:

        # Same but for a vectorized implementation
        thetaAvg = theano.shared(np.zeros(nbParameters))

    # The hypothesis on the list of outputs given the list of inputs.
    hAvg = T.dot(exInputs, thetaAvg)

    diff = hAvg - exOutputs

    # Cost function representing the average of all the differences
    # (one difference being a member of the list 'diff').
    JthetaAvg = T.tensordot(diff, diff) / (2. * nbExamples)

    # Gradient of the cost function
    gradientAvg = T.grad(cost=JthetaAvg, wrt=thetaAvg)

    # Same as for the previous implementation of the gradient descent.
    update = [thetaAvg, thetaAvg - alpha * gradientAvg]

    # Same as for the previous implementation of the gradient descent.
    trainingAvg = theano.function(inputs=[exInputs, exOutputs],
                                  outputs=JthetaAvg,
                                  updates=[update],
                                  allow_input_downcast=True)

    return trainingAvg, thetaAvg