Source code for ast_toolbox.mcts.MDP

import time

# def identity(*args):
#     if len(args) == 1:
#         return args[0]
#     return args


[docs]class TransitionModel: """The wrapper for the transitin model used in the tree search. Parameters ---------- getInitialState : function getInitialState() returns the initial AST state. getNextState : function getNextState(s, a) returns the next state and the reward. isEndState : function isEndState(s) returns whether s is a terminal state. maxSteps : int The maximum path length. goToState : function goToState(s) sets the simulator to the target state s. """ def __init__(self, getInitialState, getNextState, isEndState, maxSteps, goToState): self.getInitialState = getInitialState self.getNextState = getNextState self.isEndState = isEndState self.maxSteps = maxSteps self.goToState = goToState
[docs]def simulate(model, p, policy, verbose=False, sleeptime=0.0): """Simulate the environment model using the policy and the parameter p. Parameters ---------- model : :py:class:`ast_toolbox.mcts.MDP.TransitionModel` The environment model. p : The extra paramters needed by the policy. policy : function policy(p, s) returns the next action. verbose : bool, optional Whether to logging simulating information. sleeptime: float, optional The pause time between each step. Returns ---------- cum_reward : float The cumulative reward. actions: list The action sequence of the path. """ cum_reward = 0.0 actions = [] s = model.getInitialState() for i in range(model.maxSteps): # if verbose: # print("Step: ",i," of ", model.maxSteps) a = policy(p, s) actions.append(a) s, r = model.getNextState(s, a) time.sleep(sleeptime) cum_reward += r if model.isEndState(s): break if verbose: print("End at: ", i, " of ", model.maxSteps) return cum_reward, actions