/* This class describes an MDP, with all state transitions */

#ifndef _MDP_H_
#define _MDP_H_

#include <vector>
#include <map>

class QTable; //defined in QTable.hpp

class MDP
{
public:
  MDP();
  ~MDP();

  bool verify(bool print_error = false);

  int getNumStates() const { return num_states; }
  int getNumActions() const { return num_actions; }

  //any transitions to the same state are combined
  //the probabilities are added and the rewards are averaged
  void collapseTransitions();
  
  //we pass in the qtable to allow stuff like disabled actions
  //returns the number of DP iterations needed
  int solveByQTable(QTable& qt, bool print_status);
  
  friend std::ostream& operator<<(std::ostream& o, const MDP& m);
  friend std::istream& operator>>(std::istream& i, MDP& m);

  struct TranInfo
  {
    float prob;
    int nextstate;
    int reward;
  };
  struct lt_TranInfo_state
  {
    bool operator()(const MDP::TranInfo& t1, const MDP::TranInfo& t2)
    { return t1.nextstate < t2.nextstate; }
  };
  
  typedef std::vector<TranInfo> StateActionInfo;

  const StateActionInfo& getStateActionInfo(int state, int action) const
  { return v_state_action_info[getIdx(state, action)]; }
  
protected:

  unsigned int getIdx(int state, int action) const;
  
  int num_states;
  int num_actions;

  //this is indexed by state * num_actions + action
  typedef std::vector<StateActionInfo> StateActionInfoList;
  StateActionInfoList v_state_action_info;
  
};



#endif
