// implementation of the Jacks Auto Repair classes

#include "jacks.h"
#include "minmax.h"
#include <math.h>
extern "C" {
#include "ranlib.h"
}

istream	& operator >> (istream & str, JackState	& s)
{
  char delim;
  str >> delim;
  if (delim != '(') {
    cerr << "While reading JackState, expected '(' but read '" << delim	<< "'."	<< endl;
    abort();
  }
  str >> s.n1;
  str >> s.n2;
  str >> delim;
  if (delim != ')') {
    cerr << "while reading JackState, expected ')' but read '" <<
      delim << "'." << endl;
    abort();
  }
  return str;
}

ostream	& operator << (ostream & str, JackState	& s)
{
  str << "(" <<	s.n1 <<	" " << s.n2 << ")" ;
  return str;
}

int factorial(int n)
{
  if (n	<= 1) return 1;
  else return n	* factorial(n -	1);
}

double poisson(int n, float mean)
{
  return ( (pow(mean, n) / factorial(n)) * exp(- mean) );
}

void forwardSimulate(vector<double> & p,
		     vector<double> & reward,
		     int n,   // starting number of cars
		     int meanDemand,  // demand	for new	rentals
		     int meanReturns) // returns
{
  // n is the starting number of cars at the beginning of the day.
  // meanDemand	and meanReturns	are the	parameters of a	poisson
  // distribution describing the departure and arrival of cars.	 We
  // assume that all cars are returned before any are rented.
  // Our goal is to compute the	probability distribution over the
  // number of cars remaining at the end of the	day, and the expected
  // reward received for each of those numbers.

  // There are many tricky parts of this computation.  First, the
  // poisson distribution has support over all of the natural numbers,
  // but obviously, we don't want to worry about cases where 12345
  // cars are returned and 98487 cars are rented out, because in this
  // problem, the distributions	have means of 2, 3, or 4 cars.	A
  // quick check of the	poisson	distribution shows that	if we consider
  // all values	from 0 to 20 this will capture all of the significant
  // probability, so we	can ignore the rest.  We run the loops
  // backwards to try to get the smallest numbers first.
  //
  // A second tricky part is that we can receive different rewards and
  // still end up at the same resulting	number of cars.	 We need to
  // know the expected reward when we have carsRemaining cars left.
  // We	do this	by weighting the rewards by the	probabilities under
  // the joint distribution, and then computing	the expected reward
  // GIVEN the number of carsRemaining.
  //
  for (int demand = 0; demand <= MAXCARS; demand++) {
    double pdemand = poisson(demand, meanDemand);
    if (demand > meanDemand && pdemand < 0.001)	break;
    for	(int returns = 0; returns <= MAXCARS; returns++) {
      double preturns =	poisson(returns, meanReturns);
      if (returns > meanReturns	&& preturns < 0.001) break;
      int carsRented = min(n, demand);
      int carsRemaining	= min(n	+ returns - carsRented,	MAXCARS);
      // we make $10 on	each car rented
      double probability = pdemand * preturns;
      reward[carsRemaining] += probability * carsRented	* 10;
      p[carsRemaining] += probability;
    }
  }
  // now condition the reward distribution by the number of cars
  // remaining.
  for (int carsRemaining = 0; carsRemaining <= MAXCARS;	carsRemaining++) {
    if (p[carsRemaining] > 0.0)
      reward[carsRemaining] /= p[carsRemaining];
  }
}


void JacksProblem::UpdateInverseModel(JackState	& startState,
				      JackAction & act,
				      JackState	& endState,
				      float probability)
{
  list<PredecessorInfo<JackState, JackAction> *> & piList =
    pi[stateIndex(endState)];
  // need to see if state and action all ready exist.  They shouldn't
  // but we will check just in case.
  listIterator<PredecessorInfo<JackState, JackAction> *> itr(piList);
  for (itr.init(); !itr; ++itr)	{
    if (itr()->state ==	startState &&
	itr()->action == act) {
      cerr << "startState and act already exist	in model!";
      cerr << "	startState = " << stateIndex(startState)
	   << "	act = "	<< act << endl;
      break;
    }
  }
  piList.add(new PredecessorInfo<JackState, JackAction>(startState,
							act,
							probability));
}

void JacksProblem::buildModel()
{
  // for each state, we	generate all possible actions, and for each
  // action, all possible successor states.  We	then update the
  // inverse model.
  JackStateIterator itr;

  for (itr.init(); !itr; ++itr)	{
    // lookup the list for this	state
    list<SuccessorInfo<JackState, JackAction> *> & siList =
      si[stateIndex(itr())];
    for	(int a = -MAXMOVE; a <=	MAXMOVE; a++) {
      // the resulting state would be [(n1 - a), (n2 + a)].  check
      // that this would be legal.
      JackState	result(itr().n1	- a, itr().n2 +	a);
      if (result.n1 < 0	||
	  result.n1 > MAXCARS ||
	  result.n2 < 0	||
	  result.n2 > MAXCARS) continue;

      // ok, it	is a legal action.  Allocate a successor info record
      // for this action.  And add it onto siList.
      SuccessorInfo<JackState, JackAction> * succs = new
	SuccessorInfo<JackState, JackAction>(a);
      siList.add(succs);

      // Now for the tricky part.  We must compute the probability of
      // each possible result state.  A	result state can arise from
      // many combinations of returns and demands at the two locations.
      // We will forward compute each reachable	number of cars at each
      // location and increment	these probabilities.  Finally, we can
      // take the outer-product	of these two vectors to	get the	result
      // state probabilities.

      vector<double> p1(MAXCARS	+ 1, 0.0);  // p1[n]: probability of n cars at loc 1
      vector<double> reward1(MAXCARS + 1, 0.0);
      vector<double> p2(MAXCARS	+ 1, 0.0);  // p2[n]: probability of n cars at loc 2
      vector<double> reward2(MAXCARS + 1, 0.0);

      forwardSimulate(p1, reward1, result.n1, 3, 3);
      forwardSimulate(p2, reward2, result.n2, 4, 2);

      for (int n1 = 0; n1 < MAXCARS + 1; n1++) {
	if (p1[n1] <= 0.0) continue;
	for (int n2 = 0; n2 < MAXCARS +	1; n2++) {
	  if (p2[n2] <=	0.0) continue;

	  JackState resultState(n1, n2);

	  float	probability = p1[n1] * p2[n2];

	  ResultInfo<JackState>	* rsi =
	    new	ResultInfo<JackState>(probability,
				      reward1[n1] + reward2[n2]	- 2 * abs(a),
				      resultState);
	  succs->resultStates.add(rsi);

	  // now update	the inverse model.
	  UpdateInverseModel(itr(), a, resultState, probability);
	}
      }
    }
    cout << "Finished model for	state "	<< itr().n1 << " " << itr().n2 << endl;
    if (VERBOSE) cout << siList	<< endl;

  }
}

int rpoisson(unsigned int mean)
{
  float	meanf =	mean;
  return (int) ignpoi(meanf);
}

int carsRemaining(int n,   // starting number of cars
		  int meanDemand,  // demand for new rentals
		  int meanReturns, // returns
		  float	& reward)
{
  int demand = rpoisson(meanDemand);
  int returns =	rpoisson(meanReturns);
  int carsRented = min(n, demand);
  reward = carsRented *	10.0;
  return min(n + returns - carsRented, MAXCARS);
}

float JacksProblem::execute(JackAction & a, JackState &	s)
{
  float	reward1	= 0;
  float	reward2	= 0;
  s.n1 = carsRemaining(s.n1 - a, 3, 3, reward1);
  s.n2 = carsRemaining(s.n2 + a, 4, 2, reward2);
  return reward1 + reward2 - (2	* a);
}
