//
// Jack's Auto Repair.	Solved via value iteration.
//
//
#include "jacks.h"
#include "mdp.h"
#include <stdlib.h>
#include <stdio.h>

int VERBOSE = 0;
int BackupCounter = 0;	// number of Q backups
int OldPopCounter = 0;
int MCinterval = 100;
int MCNSteps = 10000;
int MCpolicy = 0;  // greedy

int main(int argc, char	* argv[])
{
  JacksProblem	problem;
  MDP<JackState, JackAction> mdp(&problem);

  extern char getopt(int, char *[], char *);
  extern char *	optarg;
  char optc;
  float	epsilon	= 0.01;

  while	(( optc	= getopt(argc, argv, "e:v")) !=	-1 ) {
    switch (optc) {
    case 'e':
      epsilon =	atof(optarg);
      cout << "epsilon = " << epsilon << endl;
      break;
    case 'v':
      VERBOSE =	1;
      cout << "verbose = 1" << endl;
      break;
    default:
      cout << "Usage: 4x3vi [-e	epsilon] [-v]" << endl;
      cout << "	-e epsilon	   set convergence tolerance" << endl;
      cout << "	-v		   turn	on VERBOSE output" << endl;
      return 1;
    }
  }

  mdp.ValueIteration(epsilon);

  // now print out the policy in two formats.  First, as a table for
  // input to gnuplot or other plotting	package
  JackStateIterator itr;
  for (itr.init(); !itr; ++itr)	{
    cout << itr().n1 <<	" " << itr().n2	<< " " << mdp.greedyPolicy(itr())
	 << endl;
  }

  // second, as	a grid
  int column = 0;
  for (itr.init(); !itr; ++itr)	{
    printf("%3d	", mdp.greedyPolicy(itr()));
    column++;
    if (column >= 11) {
      cout << endl;
      column = 0;
    }
  }
  return 0;
}
