105 lines
2.9 KiB
Python
Executable File
105 lines
2.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
|
|
"""
|
|
@authors: Yann
|
|
"""
|
|
|
|
|
|
import numpy as np
|
|
|
|
import theano
|
|
import theano.tensor as T
|
|
|
|
|
|
def computeIteration(alpha, nbParameters):
|
|
|
|
if nbParameters == 1:
|
|
|
|
exInput = T.scalar() # One example input
|
|
|
|
else:
|
|
|
|
exInput = T.vector() # Same with a vector for multiple parameters
|
|
|
|
exOutput = T.scalar() # The corresponding output
|
|
|
|
# The hypothesis on the leading coefficient
|
|
theta = theano.shared(np.zeros(nbParameters))
|
|
|
|
if nbParameters == 1:
|
|
|
|
# The hypothesis on the output given the input 'exInput'
|
|
h = theta * exInput
|
|
|
|
else:
|
|
|
|
h = T.dot(theta, exInput)
|
|
|
|
# The cost function representing the difference between the hypothesis and
|
|
# the real output.
|
|
Jtheta = T.mean(T.sqr(h - exOutput))
|
|
gradient = T.grad(cost=Jtheta, wrt=theta) # Gradient of the cost function
|
|
|
|
# The goal is to find the theta minimizing Jtheta.
|
|
# This theta will be the closest to the 'leadingCoefficient'.
|
|
# To do so, we use the gradient descent method.
|
|
# The next line is the update made on theta for each iteration of
|
|
# the gradient descent.
|
|
update = [theta, theta - alpha * gradient]
|
|
|
|
# Creates the training function that makes all updates for a gradient
|
|
# descent iteration given an input and the corresponding output.
|
|
training = theano.function(inputs=[exInput, exOutput],
|
|
outputs=Jtheta,
|
|
updates=[update],
|
|
allow_input_downcast=True)
|
|
|
|
return training, theta
|
|
|
|
|
|
def computeIterationAvg(alpha, nbParameters, nbExamples):
|
|
|
|
if nbParameters == 1:
|
|
|
|
exInputs = T.row() # List of all the examples' input
|
|
|
|
else:
|
|
|
|
exInputs = T.matrix() # Same, but a matrix for multiple parameters
|
|
|
|
exOutputs = T.row() # The corresponding list of all the examples' output
|
|
|
|
if nbParameters == 1:
|
|
|
|
# The hypothesis on the leading coefficients
|
|
thetaAvg = theano.shared(np.asarray(0.))
|
|
|
|
else:
|
|
|
|
# Same but for a vectorized implementation
|
|
thetaAvg = theano.shared(np.zeros(nbParameters))
|
|
|
|
# The hypothesis on the list of outputs given the list of inputs.
|
|
hAvg = T.dot(exInputs, thetaAvg)
|
|
|
|
diff = hAvg - exOutputs
|
|
|
|
# Cost function representing the average of all the differences
|
|
# (one difference being a member of the list 'diff').
|
|
JthetaAvg = T.tensordot(diff, diff) / (2. * nbExamples)
|
|
|
|
# Gradient of the cost function
|
|
gradientAvg = T.grad(cost=JthetaAvg, wrt=thetaAvg)
|
|
|
|
# Same as for the previous implementation of the gradient descent.
|
|
update = [thetaAvg, thetaAvg - alpha * gradientAvg]
|
|
|
|
# Same as for the previous implementation of the gradient descent.
|
|
trainingAvg = theano.function(inputs=[exInputs, exOutputs],
|
|
outputs=JthetaAvg,
|
|
updates=[update],
|
|
allow_input_downcast=True)
|
|
|
|
return trainingAvg, thetaAvg
|