MINDLE/dev_null/Museum/learningRateNoTheano.py

#!/usr/bin/env python3


"""
@authors: Yann & Sam'
"""


import random

import numpy as np


"""
A simple program to test some different methods to find out
  a "good learning rate" for each iteration easily.
The whole program runs without Theano, because it used to be a period
  when we couldn't get something to work with it...
"""


###############################################################################


# That's the cost function
def cost(x, y, theta):

    return (np.dot(x, theta) - y) * (np.dot(x, theta) - y)


# Returns the average of all the values of the cost function for each examples
def costAverage(coordinates, theta):

    average = 0.

    for x, y in coordinates:

        average += (np.dot(x, theta) - y)**2

    return average / (2. * len(coordinates))


# Simple gradient computing
def gradient(x, y, theta):

    return np.dot((np.dot(x, theta) - y), x)


# Returns an "averaged" gradient computed with each training examples
def gradientAverage(coordinates, theta):

    average = 0.

    for x, y in coordinates:

        average += np.dot((np.dot(x, theta) - y), x)

    return average / len(coordinates)


# Generate random coordinates which we'll use as training examples
def getRandomCoordinatesVectors(nbParameters, leadingCoefficients, nbExamples,
                                intervalWidth, intervalValues):

    coordinates = []

    for i in range(nbExamples):

        X = [1]

        for j in range(nbParameters):

            X.append(random.uniform(*intervalValues))

        y = np.dot(X, leadingCoefficients) + \
            random.uniform(-abs(intervalWidth), abs(intervalWidth))
        coordinates.append((X, y))

    return coordinates


# This function updates alpha and theta for each iteration of
#   the gradient descent.
# It will use the 'usedCost' (either 'cost' or 'costAverage') and the
#   'usedGradient' (either 'gradient' or 'gradientAverage') to perform
#   the update.
# 'args' must correspond to the list of arguments needed specifically for
#   the 'usedCost' or the 'usedGradient' (meaning theta is not included
#   in that list).
# The 'flag' corresponds to the used method for updating alpha.
def update(flag, usedCost, usedGradient, args, theta, thetaOld, alpha):

    theta = theta - alpha * usedGradient(*args, theta)

    if flag == 'try':

        if usedCost(*args, thetaOld) <= usedCost(*args, theta):

            alpha = alpha * 0.5

        thetaOld = theta

    elif flag == 'Wolfe':

        grad = usedGradient(*args, theta)
        gradNext = usedGradient(*args, theta - alpha * grad)
        Jtheta = usedCost(*args, theta)
        JthetaNext = usedCost(*args, theta - alpha * grad)

        if -np.dot(grad, gradNext) >= -0.01 * np.dot(grad, grad) and \
                JthetaNext <= Jtheta - 0.0001 * alpha * np.dot(grad, grad):

            alpha = alpha * 0.5

    elif flag == 'B&B':

        grad = usedGradient(*args, theta)
        deltaGrad = usedGradient(*args, theta - alpha * grad) - grad

        if np.dot(-alpha * grad, deltaGrad) != 0 and \
                np.dot(deltaGrad, deltaGrad) != 0:

            alpha = np.dot(-alpha * grad, deltaGrad) / np.dot(deltaGrad,
                                                              deltaGrad)

    return alpha, theta


def main():

    # ############################## Parameters ###############################

    nbIterations = 100
    nbParameters = 1
    leadingCoefficients = [1, 2]
    nbExamples = 25
    intervalWidth = 0
    intervalValues = -10, 10

    ###########################################################################

    # ################################# Begin #################################

    coordinates = getRandomCoordinatesVectors(
                    nbParameters, leadingCoefficients, nbExamples,
                    intervalWidth, intervalValues)

    alpha = 0.1
    theta = np.array([49., 27.])
    thetaOld = np.array([0.1, 0.1])

    # Hard-coded for testing (change this if needed).
    flagMethod = 'B&B'
    flagGradient = 'avg'

    for i in range(nbIterations):

        if flagGradient == 'avg':  # 'avg' as in 'Average'

            alpha, theta = update(flagMethod, costAverage, gradientAverage,
                                  [coordinates], theta, thetaOld, alpha)
            thetaOld = theta

        elif flagGradient == 'ebe':  # 'ebe' as in 'example by example'
            for x, y in coordinates:

                alpha, theta = update(flagMethod, cost, gradient,
                                      [x, y], theta, thetaOld, alpha)
                thetaOld = theta

        if alpha == 0.:

            break

    print(theta)

    # ################################## End ##################################


if __name__ == '__main__':
    main()