Source code for ast_toolbox.mcts.ASTSim

import ast_toolbox.mcts.MDP as MDP


[docs]class AcionSequence:
    """Sturcture storing the actions sequences.

    Parameters
    ----------
    sequence : list
        The list of actions.
    index : int, optional
        The initial action index in the sequence.
    """

    def __init__(self, sequence, index=0):
        self.sequence = sequence
        self.index = index


[docs]def action_seq_policy(action_seq, s):
    """The policy wrapper for the action sequence.

    Parameters
    ----------
    action_seq : :py:class:`ast_toolbox.mcts.ASTSim.AcionSequence`
        The action sequence.
    s : :py:class:`ast_toolbox.mcts.AdaptiveStressTesting.ASTState`
        The AST state.

    Returns
    ----------
    action : `ast_toolbox.mcts.AdaptiveStressTesting.ASTAction`
        The AST action.
    """
    action = action_seq.sequence[action_seq.index]
    action_seq.index += 1
    return action


[docs]def play_sequence(ast, actions, verbose=False, sleeptime=0.0):
    """Rollout the action sequence.

    Parameters
    ----------
    ast : :py:class:`ast_toolbox.mcts.AdaptiveStressTesting.AdaptiveStressTest`
        The AST object.
    actions : list
        The action sequence.
    verbose : bool, optional
        Whether to log the rollout information.
    sleeptime: float, optional
        The pause time between each step.

    Returns
    ----------
    rewards : list[float]
        The rewards.
    actions2 : list
        The action sequence of the path. Should be the same as the input actions.
    """
    rewards, actions2 = MDP.simulate(ast.transition_model, AcionSequence(actions), action_seq_policy, verbose=verbose, sleeptime=sleeptime)
    assert actions == actions2
    return rewards, actions2