Source code for ast_toolbox.mcts.ASTSim

import ast_toolbox.mcts.MDP as MDP


[docs]class AcionSequence: """Sturcture storing the actions sequences. Parameters ---------- sequence : list The list of actions. index : int, optional The initial action index in the sequence. """ def __init__(self, sequence, index=0): self.sequence = sequence self.index = index
[docs]def action_seq_policy(action_seq, s): """The policy wrapper for the action sequence. Parameters ---------- action_seq : :py:class:`ast_toolbox.mcts.ASTSim.AcionSequence` The action sequence. s : :py:class:`ast_toolbox.mcts.AdaptiveStressTesting.ASTState` The AST state. Returns ---------- action : `ast_toolbox.mcts.AdaptiveStressTesting.ASTAction` The AST action. """ action = action_seq.sequence[action_seq.index] action_seq.index += 1 return action
[docs]def play_sequence(ast, actions, verbose=False, sleeptime=0.0): """Rollout the action sequence. Parameters ---------- ast : :py:class:`ast_toolbox.mcts.AdaptiveStressTesting.AdaptiveStressTest` The AST object. actions : list The action sequence. verbose : bool, optional Whether to log the rollout information. sleeptime: float, optional The pause time between each step. Returns ---------- rewards : list[float] The rewards. actions2 : list The action sequence of the path. Should be the same as the input actions. """ rewards, actions2 = MDP.simulate(ast.transition_model, AcionSequence(actions), action_seq_policy, verbose=verbose, sleeptime=sleeptime) assert actions == actions2 return rewards, actions2