Source code for colosseumrl.envs.tron.TronGridEnvironment

import numpy as np
from typing import Dict, Tuple, List
from dill import dumps, loads
from time import time

from colosseumrl.BaseEnvironment import BaseEnvironment
from .CyTronGrid import next_state_inplace, relative_player_inplace


[docs]def create_tron_config(*args) -> str:
    """ Convert a list of parameters into a serialized tron grid string.
    Parameters
    ----------
    args
        All of the arguments into tron grid as star args.

    Returns
    -------
    str
        Serialized string
    """
    raw_string = "{};" * len(args)
    return raw_string[:-1].format(*args)


[docs]def parse_tron_config(config: str) -> Tuple:
    """ Convert a serialized configuration string into the list of options to tron grid.

    Parameters
    ----------
    config : str
        Config string in the form "{};{};...;{}"

    Returns
    -------
    List
        A list of options into tron grid environment.

    """
    if len(config) == 0:
        return 20, 4, -1, False

    def parse(inp: str):
        try:
            return int(inp)
        except ValueError:
            return inp.lower() == "true"

    options = list(map(parse, config.split(";")))
    if len(options) == 1:
        options.append(4)
    if len(options) == 2:
        options.append(-1)
    if len(options) == 3:
        options.append(False)
    return options


[docs]class TronGridEnvironment(BaseEnvironment):
    STRING_TO_ACTION = {
        "": 0,
        "forward": 0,
        "right": 1,
        "left": -1,
    }

[docs]    @staticmethod
    def create(board_size: int = 20,
               num_players: int = 4,
               observation_window: int = -1,
               remove_on_death: bool = False) -> "TronGridEnvironment":
        """ Secondary constructor with explicit options for creating the environment

        Parameters
        ----------
        board_size : int
            This will specify the square size of the playing grid.
        num_players : int
            Number of active players in the game.
        observation_window : -1
            Current not used
        remove_on_death : bool
            Whether or not to remove the player and their associated walls when they are eliminated.
        """
        return TronGridEnvironment(create_tron_config(board_size,
                                                      num_players,
                                                      observation_window,
                                                      remove_on_death))

    def __init__(self, config: str = ""):
        """ Create the discrete tron environment.

        Parameters
        ----------
        config : str
            Serialized config string for specifying options for the environment.
            Use TronGridEnvironment.create for a more programming friendly way of initializing
            the environment.

        See Also
        --------
        colosseumrl.envs.tron.TronGridEnvironment.create
            A better constructor for the tron environment.
        """
        super().__init__(config)
        board_size, num_players, observation_window, remove_on_death = parse_tron_config(config)

        self.N = board_size
        self.num_players = num_players
        self.observation_window = observation_window
        self.fully_observable = observation_window < 0
        self.remove_on_death = remove_on_death

        self.player_array = np.arange(num_players)
        self.move_array = ['forward', 'right', 'left']
        self._moves = np.zeros(num_players, dtype=np.int64)

    def __repr__(self):
        output = ""
        output += "Tron Finite Grid Environment"
        output += "="*50
        output += "\tSize: {}x{}".format(self.N, self.N)
        output += "\tNumber of players: {}".format(self.num_players)
        output += "\tFully Observable: {}".format("Yes" if self.fully_observable else "No")
        output += "\tRemove old players: {}".format("Yes" if self.remove_on_death else "No")
        output += "-"*50
        return output

    def __str__(self):
        return self.__repr__()

    @property
    def min_players(self) -> int:
        """ Property holding the number of players present required to play game.

        Returns
        -------
        int
            The specified number of players in this game.
        """
        return self.num_players

    @property
    def max_players(self) -> int:
        """ Property holding the number of players present required to play game.

        Returns
        -------
        int
            The specified number of players in this game.
        """
        return self.num_players

[docs]    @staticmethod
    def observation_names() -> List[str]:
        """ Static method for returning the names of the observation objects.

        Returns
        -------
        List[str]
            The keys of the observation dictionary.
        """
        return ["board", "heads", "directions", "deaths"]

    @property
    def observation_shape(self) -> Dict[str, tuple]:
        """ Describe the fixed numpy shapes of each observation.

        Returns
        -------
        Dict[str, Tuple[int]]
            The shape, as a tuple, of each numpy array by their name.
        """
        return {
            "board": (self.N, self.N),
            "heads": (self.num_players, ),
            "directions": (self.num_players, ),
            "deaths": (self.num_players, )
        }

[docs]    def new_state(self, num_players: int = None) -> Tuple[object, List[int]]:
        """ Create an initial tron state.

        Parameters
        ----------
        num_players : int, optional.
            The number of players for the game.
            Note, this option gets ignored here in favor of the global player configuration when creating
            the environment.

        Returns
        -------
        State : Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
            The full state of the new tron environment.
        player_list : List[int]
            Which players are currently acting.
        """
        num_players = self.num_players if num_players is None else num_players
        assert num_players == self.num_players, "Do not change the number of players from the game configuration."

        # Generate the Starting configuration
        # TODO Make the starting points fair and spread out
        np.random.seed(int(time()))
        board = np.zeros((self.N, self.N), dtype=np.int64)
        heads = np.random.choice(self.N * self.N, size=self.num_players, replace=False)
        directions = np.random.randint(0, 4, size=num_players, dtype=np.int64)
        deaths = np.zeros(self.num_players, dtype=np.int64)

        # Set up the initial board
        board.ravel()[heads] = self.player_array + 1

        return (board, heads, directions, deaths), self.player_array

[docs]    def next_state(self, state: object, players: [int], actions: [str]):
        """ Compute a single step in the game.

        Notes
        -----
        Player numbers must be numbers in the set {0, 1, ..., n-1} for an n player game.

        Parameters
        ----------
        state : Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
            The current state of the game.
        players: [int]
            The players which are taking the given actions.
        actions : [str]
            The actions of each player.

        Returns
        -------
        new_state : object
            The new state of the game.
        new_players: List[int]
            List of players who's turn it is in the new state now.
        rewards : List[float]
            The reward for each player that acted.
        terminal : bool
            Whether or not the game has ended.
        winners: List[int]
            If the game has ended, who are the winners.
        """
        board, heads, directions, deaths = state

        # Convert the move strings to move indices for c++
        for player, action in zip(players, actions):
            self._moves[player] = self.STRING_TO_ACTION[action]

        # Make a copy of the state since we operate in-place
        new_board = np.copy(board)
        new_heads = np.copy(heads)
        new_directions = np.copy(directions)
        new_deaths = np.copy(deaths)

        # Execute the move
        next_state_inplace(new_board, new_heads, new_directions, new_deaths, self._moves)

        # Reduce players to the ones still alive
        new_players = np.where(new_deaths == 0)[0]

        # Make rewards be whether or not you lived or died
        rewards = -2 * (new_deaths > 0) + 1

        # Terminal is if everyone or everyone except one has died
        terminal = new_players.size <= 1

        # Winner is the final player or nobody if tie
        winners = new_players if terminal else None

        return (new_board, new_heads, new_directions, new_deaths), new_players, rewards, terminal, winners

[docs]    def valid_actions(self, state: object, player: int) -> [str]:
        """ Valid actions for a specific state.

        Parameters
        ----------
        state : object
            The current state of the game.
        player : int
            The player who is executing this action.

        Returns
        -------
        List[str]
            All possible actions for the game.
            For tron, this will always be ['forward', 'left', 'right']
        """
        return self.move_array

[docs]    def is_valid_action(self, state: object, player: int, action: str) -> bool:
        """ Whether or not an action is valid for a specific state.

        Parameters
        ----------
        state : object
            The current state of the game.
        player : int
            The player who is executing this action.
        action : str
            The action the player is executing.

        Returns
        -------
        bool
            Whether or not this is a valid action in the current state.
            This is always true for tron as every action is valid.
        """
        return True

[docs]    def state_to_observation(self, state: object, player: int) -> Dict[str, np.ndarray]:
        """ Convert the raw game state to the observation for the agent. Maps each observation name into an observation.

        Parameters
        ----------
        state : object
            The full server state of the game.
        player : int
            Which player is getting the observation.

        Returns
        -------
        Dict[str, np.ndarray]
            The observation dictionary with keys equal to the observation_names above.

        See Also
        --------
        colosseumrl.envs.tron.TronGridEnvironment.observation_names
            The list of observatio keys.
        colosseumrl.envs.tron.TronGridEnvironment.observation_shapes
            The sizes of each observation.
        """
        board, heads, directions, deaths = state

        # Adjust board to reflect relative player number
        # i.e. observing player always sees themselves as player 1
        observation = board.copy()
        relative_player_inplace(observation, self.num_players, player + 1)

        rolled_idx = (np.arange(self.num_players) + player) % self.num_players

        heads = heads[rolled_idx]
        deaths = deaths[rolled_idx]
        directions = directions[rolled_idx]

        # Fully observable
        if self.fully_observable:
            return {
                "board": observation,
                "heads": heads,
                "directions": directions,
                "deaths": deaths
            }

        # Partially Observable
        # TODO Make this work
        # TODO Make it so that you can see far ahead but only a bit to the side and back
        else:
            head = heads[0]
            headx = head % self.N
            heady = head // self.N
            delta = self.observation_window

            return {
                "board": observation[heady - delta:heady + delta, headx - delta:headx + delta],
                "heads": heads,
                "deaths": deaths
            }

[docs]    @staticmethod
    def serializable() -> bool:
        """ Whether or not this class supports serialization of the state.

        Returns
        -------
        bool
            False, Tron doesnt need to be serializable as the state is current fully observable.
        """
        return False

[docs]    @staticmethod
    def serialize_state(state: object) -> bytearray:
        """ Serialize a game state and convert it to a bytearray to be saved or sent over a network.

        Parameters
        ----------
        state : object
            The current game state.

        Returns
        -------
        bytearray
            Serialized byte-string for the state.
        """
        return dumps(state)

[docs]    @staticmethod
    def deserialize_state(serialized_state: bytearray) -> object:
        """ Convert a serialized bytearray back into a game state.

        Parameters
        ----------
        serialized_state : bytearray
            Serialized byte-string for the state.

        Returns
        -------
        object
            The current game state.
        """
        return loads(serialized_state)