MINDLE/PolynomialRegression/polynomialRegression.cpp

/* @authors: Yann & Sam' */

#include <iostream>
#include <random>

#include <eigen3/Eigen/Dense>

using namespace std;
using namespace Eigen;

#define FIRSTALPHA   0.01
#define NBEXAMPLES   1000
#define NBITERATIONS 1000
#define INTERVALWIDTH  20
#define INTERVALVALUESINF -10
#define INTERVALVALUESSUP  10
/*
	The value below needs to be greater than or equal to 2
	(If lower: it would be an affine regression)
*/
#define NBPARAMETERS 7

#define NBITERATIONS_NEWTON 10


void getRandomCoordinates(Matrix<long double, Dynamic, Dynamic>&, Matrix<long double, 1, NBEXAMPLES>&, Matrix<long double, 1, NBPARAMETERS>);
Matrix<long double, 1, NBPARAMETERS> getGradient(Matrix<long double, Dynamic, Dynamic>&, Matrix<long double, 1, NBEXAMPLES>&, Matrix<long double, 1, NBPARAMETERS> const &);
void featuresRange(long double[]);
void featuresAverage(long double[]);
void meanNormalization(Matrix<long double, Dynamic, Dynamic>&, long double[], long double[]);
Matrix <long double, Dynamic, Dynamic> getHessian(Matrix<long double, Dynamic, Dynamic>);
long double getJ(Matrix<long double, Dynamic, Dynamic>, Matrix<long double, 1, NBEXAMPLES>, Matrix<long double, 1, NBPARAMETERS>);


int main(int argc, char const *argv[])
{
	int i;

	/* We're just filling the 'leadingCoefficient' Matrix with values */
	Matrix<long double, 1, NBPARAMETERS> leadingCoefficient;
	for(i = 0; i < NBPARAMETERS; i++)
	{
		leadingCoefficient(0, i) = i + 1;
	}

	Matrix<long double, Dynamic, Dynamic> x(NBEXAMPLES, NBPARAMETERS);
	Matrix<long double, 1, NBEXAMPLES> y;

	getRandomCoordinates(x, y, leadingCoefficient);

	std::string s;

	if(argc == 1)
	{
		s = "graddesc";
	}
	else if(argc == 2)
	{
		s = argv[1];
	}
	else
	{
		std::cerr << "Too many arguments : expected 1 or 0 got " << argc - 1 << std::endl;
		std::cerr << "Usage : ./bin/polynomialRegression.bin [graddesc|newton|normequ|compare]" << std::endl;
		return 1;
	}

	if(s == "graddesc" || s == "compare")
	{
		long double range[NBPARAMETERS];
		long double average[NBPARAMETERS];
		featuresRange(range);
		featuresAverage(average);

		Matrix<long double, Dynamic, Dynamic> x_graddesc(x);

		meanNormalization(x_graddesc, range, average);

		Matrix<long double, 1, NBPARAMETERS> theta;
		theta.setZero();

		long double alpha(FIRSTALPHA);

		Matrix<long double, 1, NBPARAMETERS> gradOld = getGradient(x_graddesc, y, theta);

		Matrix<long double, 1, NBPARAMETERS> grad;
		Matrix<long double, 1, NBPARAMETERS> gradDiff;

		for(i = 0; i < NBITERATIONS; i++)
		{
			theta -= alpha * gradOld;
			grad = getGradient(x_graddesc, y, theta);

			if(grad != gradOld)
			{
				gradDiff = grad - gradOld;
				alpha = -alpha * gradOld.dot(gradDiff) / gradDiff.dot(gradDiff);
			}

			if(grad.isZero())
			{
				i++;
				break;
			}

			gradOld = grad;
		}

		/* Undoes the process of mean normalization to get the actual value of theta*/
		for(int j(1); j < NBPARAMETERS; j++)
		{
			theta(0, 0) -= theta(0, j) * average[j] / range[j];
			theta(0, j) /= range[j];
		}

		std::cout << "-------------------------------------------------------" << std::endl;
		std::cout << "                    Gradient Descent" << std::endl;
		std::cout << "-------------------------------------------------------" << std::endl;
		std::cout << "Theta: " << theta << std::endl;
		std::cout << "| Done in " << i << " iterations. |" << std::endl;
		std::cout << "-------------------------------------------------------" << std::endl;
	}
	if(s == "newton" || s == "compare")
	{
		Matrix<long double, 1, NBPARAMETERS> theta;
		theta.setOnes();

		Matrix<long double, 1, NBPARAMETERS> grad;
		Matrix<long double, Dynamic, Dynamic> hessian(NBPARAMETERS, NBPARAMETERS);

		for(i = 0; i < NBITERATIONS_NEWTON; i++)
		{
			grad = getGradient(x, y, theta);
			hessian = getHessian(x);

			theta -= hessian.inverse() * grad.transpose();

			if(grad.isZero())
			{
				i++;
				break;
			}
		}

		std::cout << "-------------------------------------------------------" << std::endl;
		std::cout << "                    Newton's mehtod" << std::endl;
		std::cout << "-------------------------------------------------------" << std::endl;
		std::cout << "Theta: " << theta << std::endl;
		std::cout << "| Done in " << i << " iterations. |" << std::endl;
		std::cout << "-------------------------------------------------------" << std::endl;
	}
	if(s == "normequ" || s == "compare")
	{
		Matrix<long double, 1, NBPARAMETERS> theta = (x.transpose() * x).inverse() * x.transpose() * y.transpose();

		std::cout << "-------------------------------------------------------" << std::endl;
		std::cout << "                    Normal equation" << std::endl;
		std::cout << "-------------------------------------------------------" << std::endl;
		std::cout << "Theta: " << theta << std::endl;
		std::cout << "-------------------------------------------------------" << std::endl;
	}
	if(s == "help")
	{
		std::cout << "Usage : ./bin/polynomialRegression.bin [graddesc|newton|normequ|compare]" << std::endl;
	}
	if(s != "graddesc" && s != "newton" && s != "normequ" && s != "compare" && s != "help")
	{
		std::cerr << "Unknown argument " << s << std::endl;
		std::cerr << "Usage : ./bin/polynomialRegression.bin [graddesc|newton|normequ|compare]" << std::endl;
	}


	return 0;
}


void getRandomCoordinates(Matrix<long double, Dynamic, Dynamic> &x, Matrix<long double, 1, NBEXAMPLES> &y, Matrix<long double, 1, NBPARAMETERS> leadingCoefficient)
{
	random_device rd;
	mt19937 mt(rd());
	/* We set a random engine for the inputs... */
	uniform_real_distribution<double> random_bias_inputs(INTERVALVALUESINF, INTERVALVALUESSUP);
	/* ... and another one the outputs */
	uniform_real_distribution<double> random_bias_outputs(-INTERVALWIDTH, INTERVALWIDTH);

	for(int i(0); i < NBEXAMPLES; i++)
	{
		x(i, 0) = 1.0;

		/* A 'x' generated within the defined interval */
		x(i, 1) = random_bias_inputs(mt);

		/* Let's put this input to each powers of itself */
		for(int j(2); j < NBPARAMETERS; j++)
		{
			x(i, j) = powl(x(i, 1), j);
		}
		/* Now x = [1, x, x², x³, ...] */

		y(0, i) = leadingCoefficient.dot(x.row(i)) + random_bias_outputs(mt);
	}
}

Matrix<long double, 1, NBPARAMETERS> getGradient(Matrix<long double, Dynamic, Dynamic> &x, Matrix<long double, 1, NBEXAMPLES> &y, Matrix<long double, 1, NBPARAMETERS> const &theta)
{
	Matrix<long double, 1, NBPARAMETERS> result;
	result.setZero();

	for(int i(0); i < NBEXAMPLES; i++)
	{
		result += (theta.dot(x.row(i)) - y(0, i)) * (x.row(i));
	}

	result *= (1.0 / NBEXAMPLES);


	return result;
}

/* Puts all features in range [-1, 1] */
void meanNormalization(Matrix<long double, Dynamic, Dynamic> &x, long double range[], long double average[])
{
	for(int i(0); i < NBEXAMPLES; i++)
	{
		for(int j(0); j < NBPARAMETERS; j++)
		{
			x(i, j) = (x(i, j) - average[j]) / range[j];
		}
	}
}

/* Computes the inverse of the range of each feature in range */
void featuresRange(long double range[])
{
	range[0] = 1; /* In order not to modify the x0 feature in meanNormalization */
	for(int i(1); i < NBPARAMETERS; i += 2)
	{
		range[i] = abs(pow(INTERVALVALUESSUP, i) - pow(INTERVALVALUESINF, i));
	}

	for(int i(2); i < NBPARAMETERS; i += 2)
	{
		if(INTERVALVALUESSUP > 0 && INTERVALVALUESINF < 0)
			range[i] = max(pow(INTERVALVALUESSUP, i), pow(INTERVALVALUESINF, i));
		else
			range[i] = (max(pow(INTERVALVALUESSUP, i), pow(INTERVALVALUESINF, i))
				- min(pow(INTERVALVALUESSUP, i), pow(INTERVALVALUESINF, i)));
	}
}

/* Computes the average of each feature in average*/
void featuresAverage(long double average[])
{
	average[0] = 0; /* In order not to modify the x0 feature in meanNormalization */

	for(int i(1); i < NBPARAMETERS; i += 2)
	{
		average[i] = (pow(INTERVALVALUESSUP, i) + pow(INTERVALVALUESINF, i)) / 2;
	}

	for(int i(2); i < NBPARAMETERS; i += 2)
	{
		if(INTERVALVALUESSUP > 0 && INTERVALVALUESINF < 0)
			average[i] = pow(INTERVALVALUESSUP, i) / 2;
		else
			average[i] = (pow(INTERVALVALUESSUP, i) + pow(INTERVALVALUESINF, i)) / 2;
	}
}

Matrix <long double, Dynamic, Dynamic> getHessian(Matrix<long double, Dynamic, Dynamic> x)
{
	return (x.transpose() * x) / NBEXAMPLES;
}

long double getJ(Matrix<long double, Dynamic, Dynamic> x, Matrix<long double, 1, NBEXAMPLES> y, Matrix<long double, 1, NBPARAMETERS> theta)
{
	long double result(0);

	for(int i(0); i< NBPARAMETERS; i++)
	{
		result += powl(theta.dot(x.row(i)) - y(0, i), 2);
	}

	return result / (2 * NBEXAMPLES);
}