Source code for ast_toolbox.rewards.example_av_reward

"""An example implementation of an ASTReward for an AV validation scenario."""
import numpy as np  # useful packages for math

from ast_toolbox.rewards import ASTReward  # import base class


# Define the class, inherit from the base
[docs]class ExampleAVReward(ASTReward):
    """An example implementation of an ASTReward for an AV validation scenario.

    Parameters
    ----------
    num_peds : int
        The number of pedestrians in the scenario.
    cov_x : float
        Covariance of the x-acceleration.
    cov_y : float
        Covariance of the y-acceleration.
    cov_sensor_noise : float
        Covariance of the sensor noise.
    use_heuristic : bool
        Whether to include a heuristic in the reward based on how close the pedestrian is to the vehicle
        at the end of the trajectory.
    """

    def __init__(self,
                 num_peds=1,
                 cov_x=0.1,
                 cov_y=0.01,
                 cov_sensor_noise=0.1,
                 use_heuristic=True):

        self.c_num_peds = num_peds
        self.c_cov_x = cov_x
        self.c_cov_y = cov_y
        self.c_cov_sensor_noise = cov_sensor_noise
        self.use_heuristic = use_heuristic
        super().__init__()

[docs]    def give_reward(self, action, **kwargs):
        """Returns the reward for a given time step.

        Parameters
        ----------
        action : array_like
            Action taken by the AST solver.
        kwargs :
            Accepts relevant info for computing the reward.
        Returns
        -------
        reward : float
            Reward based on the previous action.
        """
        # get the info from the simulator
        info = kwargs['info']
        peds = info["peds"]
        car = info["car"]
        is_goal = info["is_goal"]
        is_terminal = info["is_terminal"]
        dist = peds[:, 2:4] - car[2:4]

        # update reward and done bool

        if (is_goal):  # We found a crash
            reward = 0
        elif (is_terminal):
            # reward = 0
            # Heuristic reward based on distance between car and ped at end
            if self.use_heuristic:
                heuristic_reward = np.min(np.linalg.norm(dist, axis=1))
            else:
                # No Herusitic
                heuristic_reward = 0
            reward = -100000 - 10000 * heuristic_reward  # We reached
            # the horizon with no crash
        else:
            reward = -self.mahalanobis_d(action)  # No crash or horizon yet

        return reward

[docs]    def mahalanobis_d(self, action):
        """Calculate the Mahalanobis distance [1]_ between the action and the mean action.

        Parameters
        ----------
        action : array_like
            Action taken by the AST solver.

        Returns
        -------
        float
            The Mahalanobis distance between the action and the mean action.

        References
        ----------
        .. [1] Mahalanobis, Prasanta Chandra. "On the generalized distance in statistics." National Institute of
            Science of India, 1936.
            `<http://library.isical.ac.in:8080/jspui/bitstream/123456789/6765/1/Vol02_1936_1_Art05-pcm.pdf>`_
        """
        # Mean action is 0
        mean = np.zeros((6 * self.c_num_peds, 1))
        # Assemble the diagonal covariance matrix
        cov = np.zeros((self.c_num_peds, 6))
        cov[:, 0:6] = np.array([self.c_cov_x, self.c_cov_y,
                                self.c_cov_sensor_noise, self.c_cov_sensor_noise,
                                self.c_cov_sensor_noise, self.c_cov_sensor_noise])
        big_cov = np.diagflat(cov)

        # subtract the mean from our actions
        dif = np.copy(action)
        dif[::2] -= mean[0, 0]
        dif[1::2] -= mean[1, 0]

        # calculate the Mahalanobis distance
        dist = np.dot(np.dot(dif.T, np.linalg.inv(big_cov)), dif)

        return np.sqrt(dist)