158 lines
4.6 KiB
Python
Executable File
158 lines
4.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
|
|
"""
|
|
@authors: Yann & Sam'
|
|
"""
|
|
|
|
import random
|
|
|
|
import numpy as np
|
|
|
|
import theano
|
|
import theano.tensor as T
|
|
|
|
|
|
"""
|
|
The goal of this program is to make a linear regression for any number of
|
|
parameters in the input.
|
|
"""
|
|
|
|
|
|
"""
|
|
Due to our moving from Theano to TensorFlow, we've stopped the development of
|
|
this program.
|
|
Theano didn't give us the possibility of get the gradient's value anytime
|
|
during the training.
|
|
"""
|
|
|
|
|
|
###############################################################################
|
|
|
|
|
|
"""
|
|
This function generates 'nbExamples' tuples of coordinates.
|
|
The first member being the values of the input and the second member being the
|
|
output corresponding to the input, to which is added some noise of variance
|
|
'intervalWidth'. The output is linear in each parameters of the input.
|
|
'leadingCoefficients' contains all the slopes.
|
|
"""
|
|
|
|
|
|
def getRandomCoordinatesVectors(nbParameters, leadingCoefficients,
|
|
nbExamples, intervalWidth):
|
|
|
|
# 'coordinates' will be created in this way :
|
|
# [([x0, ..., xn], y0), ..., ([x0, ..., xn], ym)]
|
|
# (with 'n' the number of parameters, and 'm' the number of examples)
|
|
coordinates = []
|
|
|
|
for i in range(nbExamples):
|
|
|
|
X = [1]
|
|
|
|
for j in range(nbParameters):
|
|
|
|
# Creates the input parameters with random values for
|
|
# each dimension.
|
|
X.append(random.uniform(-abs(nbExamples), abs(nbExamples)))
|
|
|
|
# Creates the output corresponding to the input.
|
|
y = np.dot(X, leadingCoefficients) + \
|
|
random.uniform(-abs(intervalWidth), abs(intervalWidth))
|
|
# --> 'np.dot()': Given two 1-D vectors, returns the inner product of
|
|
# the two vectors.
|
|
|
|
coordinates.append((X, y))
|
|
|
|
return coordinates
|
|
|
|
|
|
def main():
|
|
|
|
# ############################## Parameters ##############################
|
|
|
|
nbIterations = 1000
|
|
|
|
leadingCoefficients = [i for i in range(1, 4)]
|
|
nbParameters = len(leadingCoefficients) - 1
|
|
nbExamples = 75
|
|
intervalWidth = 0
|
|
|
|
###########################################################################
|
|
|
|
coordinates = getRandomCoordinatesVectors(nbParameters,
|
|
leadingCoefficients,
|
|
nbExamples,
|
|
intervalWidth)
|
|
|
|
x, y = zip(*coordinates)
|
|
|
|
x = list(x)
|
|
y = list(y)
|
|
|
|
# ########################### THEANO BLACK BOX ############################
|
|
|
|
X = T.matrix()
|
|
Y = T.row()
|
|
|
|
# First value of 'alpha'. Actually, whatever it is, this algorithm should
|
|
# always converge.
|
|
alpha = theano.shared(np.asarray(1.))
|
|
# At beginning, all parameters are equals to '0'
|
|
theta = theano.shared(np.zeros(len(leadingCoefficients)))
|
|
|
|
# Just the first gradient is needed to be computed manually
|
|
# (value for (and only for) the first iteration)
|
|
gradientOld = theano.shared(
|
|
np.asarray(
|
|
sum(
|
|
[np.dot(-y[i], x[i]) for i in range(nbExamples)]
|
|
) / float(nbExamples)
|
|
)
|
|
)
|
|
|
|
h = T.dot(X, theta) # <-- Model
|
|
|
|
diff = h - Y
|
|
Jtheta = T.tensordot(diff, diff) / (2 * nbExamples)
|
|
gradient = T.grad(cost=Jtheta, wrt=theta)
|
|
|
|
# Here are the training updates, it's the Barzilai & Borwein
|
|
# implementation for the 'alpha' one.
|
|
updateTheta = [theta, theta - alpha * gradient]
|
|
updateAlpha = [alpha,
|
|
theano.tensor.dot(-alpha * gradientOld,
|
|
gradient - gradientOld) /
|
|
theano.tensor.dot(gradient - gradientOld,
|
|
gradient - gradientOld)]
|
|
|
|
updateGradOld = [gradientOld, gradient]
|
|
|
|
training = theano.function(inputs=[X, Y],
|
|
outputs=Jtheta,
|
|
updates=[updateTheta,
|
|
updateAlpha,
|
|
updateGradOld],
|
|
allow_input_downcast=True)
|
|
|
|
###########################################################################
|
|
|
|
for i in range(nbIterations):
|
|
|
|
# To avoid 'nan', there is a break condition for the training loop when
|
|
# the gradient becomes too small (it means that we're reaching the
|
|
# "good" parameters values)...
|
|
if np.linalg.norm(gradientOld.get_value()) < 1.e-4:
|
|
|
|
break
|
|
|
|
# There is a cast on 'y' as a 'list' for a dimensional purpose
|
|
training(x, [y])
|
|
|
|
print(theta.get_value())
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|