// implementation of the windy maze MDP	from Figure 6.10

#include "windy.h"
#include "minmax.h"
#include <fstream.h>
int wind[10] = {0, 0, 0, 1, 1, 1, 2, 2,	1, 0};


float WindyProblem::execute(WindyAction	& act, WindyState & s)
{
  if (act < 0 || act > 3) {
    cerr << "Illegal action passed to WindyState::execute = " << act
	 << endl;
    abort();
  }

  WindyAction a	= act;

  // now implement semantics of	actions	deterministically
  switch (a) {
  case West: {
    s.n1--;
    if (s.n1 < 0) s.n1 = 0;
    break;
  case East:
    s.n1++;
    if (s.n1 >=	NCOLUMNS) s.n1 = NCOLUMNS - 1;
    break;
  case North:
    s.n2++;
    if (s.n2 >=	NROWS) s.n2 = NROWS - 1;
    break;
  case South:
    s.n2--;
    if (s.n2 < 0) s.n2 = 0;
    break;
  }
  }
  // implement wind
  s.n2 += wind[s.n1];
  if (s.n2 >= NROWS) s.n2 = NROWS - 1;

  if (VERBOSE) {
    cout << s.n1 << " "	<< s.n2	<< " action " << a << endl;
  }

  // we	give a positive	reward for reaching the	terminal state.
  if (s.n1 == 7	&& s.n2	== 3) return 20.0;
  else return -1.0;
}

void WindyProblem::UpdateInverseModel(WindyState & startState,
					WindyAction & act,
					WindyState & endState,
					float probability)
{
  list<PredecessorInfo<WindyState, WindyAction>	*> & piList =
    pi[stateIndex(endState)];
  // need to see if state and action all ready exist.  They shouldn't
  // but we will check just in case.
  listIterator<PredecessorInfo<WindyState, WindyAction>	*> itr(piList);
  for (itr.init(); !itr; ++itr)	{
    if (itr()->state ==	startState &&
	itr()->action == act) {
      cerr << "startState and act already exist	in model!";
      cerr << "	startState = " << stateIndex(startState)
	   << "	act = "	<< act << endl;
      break;
    }
  }
  piList.add(new PredecessorInfo<WindyState,
	     WindyAction>(startState, act, probability));
}

// the numbers in the input file begin with 1.
istream	& operator >> (istream & str, WindyState & s)
{
  char delim;
  str >> delim;
  if (delim != '(') {
    cerr << "Found `" << delim << "' where `(' was expected." << endl;
  }
  str >> s.n1;
  str >> s.n2;
  str >> delim;
  if (delim != ')') {
    cerr << "Found `" << delim << "' where `)' was expected." << endl;
  }
  return str;
}

ostream	& operator << (ostream & str, WindyState & s)
{
  str << "(" <<	s.n1 <<	" " << s.n2 << ")";
  return str;
}

void WindyProblem::buildModel()
{
  // Construct the forward model.  Actions are deterministic, so we
  // will use our simulator from above.
  WindyStateIterator itr;
  for (itr.init(); !itr; ++itr)	{
    // lookup the successor info list for this state
    list<SuccessorInfo<WindyState, WindyAction>	*> & siList =
      si[stateIndex(itr())];
    // for each	action
    for	(int a = 0; a <= 3; a++) {
      // execute the action to determine the next state	and reward
      WindyState resultState = itr();
      float reward = execute(a,	resultState);
      // update	the model
      SuccessorInfo<WindyState,	WindyAction> * succs = new
	SuccessorInfo<WindyState, WindyAction>(a);
      siList.add(succs);
      ResultInfo<WindyState> * rsi =
	new ResultInfo<WindyState>(1.0,	reward,	resultState);
      succs->resultStates.add(rsi);
    }
  }

  // now visit every state and update the inverse model
  for (itr.init(); !itr; ++itr)	{
    list<SuccessorInfo<WindyState, WindyAction>	*> *
      si = successors(itr());
    listIterator<SuccessorInfo<WindyState, WindyAction>	*>
      sitr(*si);
    for	(sitr.init(); !sitr; ++sitr) {
      listIterator<ResultInfo<WindyState> *> ritr(sitr()->resultStates);
      for (ritr.init();	!ritr; ++ritr) {
	UpdateInverseModel(itr(),
			   sitr()->action,
			   ritr()->state,
			   ritr()->probability);
      }
    }
  }
}
