This repository has been archived on 2023-11-03. You can view files and clone it, but cannot push or open issues or pull requests.
MINDLE/dev_null/Theano/learningRateDiscoveringVect...

158 lines
4.6 KiB
Python
Executable File

#!/usr/bin/env python3
"""
@authors: Yann & Sam'
"""
import random
import numpy as np
import theano
import theano.tensor as T
"""
The goal of this program is to make a linear regression for any number of
parameters in the input.
"""
"""
Due to our moving from Theano to TensorFlow, we've stopped the development of
this program.
Theano didn't give us the possibility of get the gradient's value anytime
during the training.
"""
###############################################################################
"""
This function generates 'nbExamples' tuples of coordinates.
The first member being the values of the input and the second member being the
output corresponding to the input, to which is added some noise of variance
'intervalWidth'. The output is linear in each parameters of the input.
'leadingCoefficients' contains all the slopes.
"""
def getRandomCoordinatesVectors(nbParameters, leadingCoefficients,
nbExamples, intervalWidth):
# 'coordinates' will be created in this way :
# [([x0, ..., xn], y0), ..., ([x0, ..., xn], ym)]
# (with 'n' the number of parameters, and 'm' the number of examples)
coordinates = []
for i in range(nbExamples):
X = [1]
for j in range(nbParameters):
# Creates the input parameters with random values for
# each dimension.
X.append(random.uniform(-abs(nbExamples), abs(nbExamples)))
# Creates the output corresponding to the input.
y = np.dot(X, leadingCoefficients) + \
random.uniform(-abs(intervalWidth), abs(intervalWidth))
# --> 'np.dot()': Given two 1-D vectors, returns the inner product of
# the two vectors.
coordinates.append((X, y))
return coordinates
def main():
# ############################## Parameters ##############################
nbIterations = 1000
leadingCoefficients = [i for i in range(1, 4)]
nbParameters = len(leadingCoefficients) - 1
nbExamples = 75
intervalWidth = 0
###########################################################################
coordinates = getRandomCoordinatesVectors(nbParameters,
leadingCoefficients,
nbExamples,
intervalWidth)
x, y = zip(*coordinates)
x = list(x)
y = list(y)
# ########################### THEANO BLACK BOX ############################
X = T.matrix()
Y = T.row()
# First value of 'alpha'. Actually, whatever it is, this algorithm should
# always converge.
alpha = theano.shared(np.asarray(1.))
# At beginning, all parameters are equals to '0'
theta = theano.shared(np.zeros(len(leadingCoefficients)))
# Just the first gradient is needed to be computed manually
# (value for (and only for) the first iteration)
gradientOld = theano.shared(
np.asarray(
sum(
[np.dot(-y[i], x[i]) for i in range(nbExamples)]
) / float(nbExamples)
)
)
h = T.dot(X, theta) # <-- Model
diff = h - Y
Jtheta = T.tensordot(diff, diff) / (2 * nbExamples)
gradient = T.grad(cost=Jtheta, wrt=theta)
# Here are the training updates, it's the Barzilai & Borwein
# implementation for the 'alpha' one.
updateTheta = [theta, theta - alpha * gradient]
updateAlpha = [alpha,
theano.tensor.dot(-alpha * gradientOld,
gradient - gradientOld) /
theano.tensor.dot(gradient - gradientOld,
gradient - gradientOld)]
updateGradOld = [gradientOld, gradient]
training = theano.function(inputs=[X, Y],
outputs=Jtheta,
updates=[updateTheta,
updateAlpha,
updateGradOld],
allow_input_downcast=True)
###########################################################################
for i in range(nbIterations):
# To avoid 'nan', there is a break condition for the training loop when
# the gradient becomes too small (it means that we're reaching the
# "good" parameters values)...
if np.linalg.norm(gradientOld.get_value()) < 1.e-4:
break
# There is a cast on 'y' as a 'list' for a dimensional purpose
training(x, [y])
print(theta.get_value())
if __name__ == '__main__':
main()