import numpy as np
from typing import Dict, Tuple, List
from dill import dumps, loads
from time import time
from colosseumrl.BaseEnvironment import BaseEnvironment
from .CyTronGrid import next_state_inplace, relative_player_inplace
[docs]def create_tron_config(*args) -> str:
""" Convert a list of parameters into a serialized tron grid string.
Parameters
----------
args
All of the arguments into tron grid as star args.
Returns
-------
str
Serialized string
"""
raw_string = "{};" * len(args)
return raw_string[:-1].format(*args)
[docs]def parse_tron_config(config: str) -> Tuple:
""" Convert a serialized configuration string into the list of options to tron grid.
Parameters
----------
config : str
Config string in the form "{};{};...;{}"
Returns
-------
List
A list of options into tron grid environment.
"""
if len(config) == 0:
return 20, 4, -1, False
def parse(inp: str):
try:
return int(inp)
except ValueError:
return inp.lower() == "true"
options = list(map(parse, config.split(";")))
if len(options) == 1:
options.append(4)
if len(options) == 2:
options.append(-1)
if len(options) == 3:
options.append(False)
return options
[docs]class TronGridEnvironment(BaseEnvironment):
STRING_TO_ACTION = {
"": 0,
"forward": 0,
"right": 1,
"left": -1,
}
[docs] @staticmethod
def create(board_size: int = 20,
num_players: int = 4,
observation_window: int = -1,
remove_on_death: bool = False) -> "TronGridEnvironment":
""" Secondary constructor with explicit options for creating the environment
Parameters
----------
board_size : int
This will specify the square size of the playing grid.
num_players : int
Number of active players in the game.
observation_window : -1
Current not used
remove_on_death : bool
Whether or not to remove the player and their associated walls when they are eliminated.
"""
return TronGridEnvironment(create_tron_config(board_size,
num_players,
observation_window,
remove_on_death))
def __init__(self, config: str = ""):
""" Create the discrete tron environment.
Parameters
----------
config : str
Serialized config string for specifying options for the environment.
Use TronGridEnvironment.create for a more programming friendly way of initializing
the environment.
See Also
--------
colosseumrl.envs.tron.TronGridEnvironment.create
A better constructor for the tron environment.
"""
super().__init__(config)
board_size, num_players, observation_window, remove_on_death = parse_tron_config(config)
self.N = board_size
self.num_players = num_players
self.observation_window = observation_window
self.fully_observable = observation_window < 0
self.remove_on_death = remove_on_death
self.player_array = np.arange(num_players)
self.move_array = ['forward', 'right', 'left']
self._moves = np.zeros(num_players, dtype=np.int64)
def __repr__(self):
output = ""
output += "Tron Finite Grid Environment"
output += "="*50
output += "\tSize: {}x{}".format(self.N, self.N)
output += "\tNumber of players: {}".format(self.num_players)
output += "\tFully Observable: {}".format("Yes" if self.fully_observable else "No")
output += "\tRemove old players: {}".format("Yes" if self.remove_on_death else "No")
output += "-"*50
return output
def __str__(self):
return self.__repr__()
@property
def min_players(self) -> int:
""" Property holding the number of players present required to play game.
Returns
-------
int
The specified number of players in this game.
"""
return self.num_players
@property
def max_players(self) -> int:
""" Property holding the number of players present required to play game.
Returns
-------
int
The specified number of players in this game.
"""
return self.num_players
[docs] @staticmethod
def observation_names() -> List[str]:
""" Static method for returning the names of the observation objects.
Returns
-------
List[str]
The keys of the observation dictionary.
"""
return ["board", "heads", "directions", "deaths"]
@property
def observation_shape(self) -> Dict[str, tuple]:
""" Describe the fixed numpy shapes of each observation.
Returns
-------
Dict[str, Tuple[int]]
The shape, as a tuple, of each numpy array by their name.
"""
return {
"board": (self.N, self.N),
"heads": (self.num_players, ),
"directions": (self.num_players, ),
"deaths": (self.num_players, )
}
[docs] def new_state(self, num_players: int = None) -> Tuple[object, List[int]]:
""" Create an initial tron state.
Parameters
----------
num_players : int, optional.
The number of players for the game.
Note, this option gets ignored here in favor of the global player configuration when creating
the environment.
Returns
-------
State : Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
The full state of the new tron environment.
player_list : List[int]
Which players are currently acting.
"""
num_players = self.num_players if num_players is None else num_players
assert num_players == self.num_players, "Do not change the number of players from the game configuration."
# Generate the Starting configuration
# TODO Make the starting points fair and spread out
np.random.seed(int(time()))
board = np.zeros((self.N, self.N), dtype=np.int64)
heads = np.random.choice(self.N * self.N, size=self.num_players, replace=False)
directions = np.random.randint(0, 4, size=num_players, dtype=np.int64)
deaths = np.zeros(self.num_players, dtype=np.int64)
# Set up the initial board
board.ravel()[heads] = self.player_array + 1
return (board, heads, directions, deaths), self.player_array
[docs] def next_state(self, state: object, players: [int], actions: [str]):
""" Compute a single step in the game.
Notes
-----
Player numbers must be numbers in the set {0, 1, ..., n-1} for an n player game.
Parameters
----------
state : Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
The current state of the game.
players: [int]
The players which are taking the given actions.
actions : [str]
The actions of each player.
Returns
-------
new_state : object
The new state of the game.
new_players: List[int]
List of players who's turn it is in the new state now.
rewards : List[float]
The reward for each player that acted.
terminal : bool
Whether or not the game has ended.
winners: List[int]
If the game has ended, who are the winners.
"""
board, heads, directions, deaths = state
# Convert the move strings to move indices for c++
for player, action in zip(players, actions):
self._moves[player] = self.STRING_TO_ACTION[action]
# Make a copy of the state since we operate in-place
new_board = np.copy(board)
new_heads = np.copy(heads)
new_directions = np.copy(directions)
new_deaths = np.copy(deaths)
# Execute the move
next_state_inplace(new_board, new_heads, new_directions, new_deaths, self._moves)
# Reduce players to the ones still alive
new_players = np.where(new_deaths == 0)[0]
# Make rewards be whether or not you lived or died
rewards = -2 * (new_deaths > 0) + 1
# Terminal is if everyone or everyone except one has died
terminal = new_players.size <= 1
# Winner is the final player or nobody if tie
winners = new_players if terminal else None
return (new_board, new_heads, new_directions, new_deaths), new_players, rewards, terminal, winners
[docs] def valid_actions(self, state: object, player: int) -> [str]:
""" Valid actions for a specific state.
Parameters
----------
state : object
The current state of the game.
player : int
The player who is executing this action.
Returns
-------
List[str]
All possible actions for the game.
For tron, this will always be ['forward', 'left', 'right']
"""
return self.move_array
[docs] def is_valid_action(self, state: object, player: int, action: str) -> bool:
""" Whether or not an action is valid for a specific state.
Parameters
----------
state : object
The current state of the game.
player : int
The player who is executing this action.
action : str
The action the player is executing.
Returns
-------
bool
Whether or not this is a valid action in the current state.
This is always true for tron as every action is valid.
"""
return True
[docs] def state_to_observation(self, state: object, player: int) -> Dict[str, np.ndarray]:
""" Convert the raw game state to the observation for the agent. Maps each observation name into an observation.
Parameters
----------
state : object
The full server state of the game.
player : int
Which player is getting the observation.
Returns
-------
Dict[str, np.ndarray]
The observation dictionary with keys equal to the observation_names above.
See Also
--------
colosseumrl.envs.tron.TronGridEnvironment.observation_names
The list of observatio keys.
colosseumrl.envs.tron.TronGridEnvironment.observation_shapes
The sizes of each observation.
"""
board, heads, directions, deaths = state
# Adjust board to reflect relative player number
# i.e. observing player always sees themselves as player 1
observation = board.copy()
relative_player_inplace(observation, self.num_players, player + 1)
rolled_idx = (np.arange(self.num_players) + player) % self.num_players
heads = heads[rolled_idx]
deaths = deaths[rolled_idx]
directions = directions[rolled_idx]
# Fully observable
if self.fully_observable:
return {
"board": observation,
"heads": heads,
"directions": directions,
"deaths": deaths
}
# Partially Observable
# TODO Make this work
# TODO Make it so that you can see far ahead but only a bit to the side and back
else:
head = heads[0]
headx = head % self.N
heady = head // self.N
delta = self.observation_window
return {
"board": observation[heady - delta:heady + delta, headx - delta:headx + delta],
"heads": heads,
"deaths": deaths
}
[docs] @staticmethod
def serializable() -> bool:
""" Whether or not this class supports serialization of the state.
Returns
-------
bool
False, Tron doesnt need to be serializable as the state is current fully observable.
"""
return False
[docs] @staticmethod
def serialize_state(state: object) -> bytearray:
""" Serialize a game state and convert it to a bytearray to be saved or sent over a network.
Parameters
----------
state : object
The current game state.
Returns
-------
bytearray
Serialized byte-string for the state.
"""
return dumps(state)
[docs] @staticmethod
def deserialize_state(serialized_state: bytearray) -> object:
""" Convert a serialized bytearray back into a game state.
Parameters
----------
serialized_state : bytearray
Serialized byte-string for the state.
Returns
-------
object
The current game state.
"""
return loads(serialized_state)