game: deep_sea

GameType.chance_mode = ChanceMode.DETERMINISTIC
GameType.dynamics = Dynamics.SEQUENTIAL
GameType.information = Information.IMPERFECT_INFORMATION
GameType.long_name = "DeepSea"
GameType.max_num_players = 1
GameType.min_num_players = 1
GameType.parameter_specification = ["randomize_actions", "seed", "size", "unscaled_move_cost"]
GameType.provides_information_state_string = False
GameType.provides_information_state_tensor = False
GameType.provides_observation_string = True
GameType.provides_observation_tensor = True
GameType.provides_factored_observation_string = False
GameType.reward_model = RewardModel.REWARDS
GameType.short_name = "deep_sea"
GameType.utility = Utility.GENERAL_SUM

NumDistinctActions() = 2
PolicyTensorShape() = [2]
MaxChanceOutcomes() = 2
GetParameters() = {randomize_actions=True,seed=42,size=5,unscaled_move_cost=0.01}
NumPlayers() = 1
MinUtility() = -0.01
MaxUtility() = 0.99
UtilitySum() = None
ObservationTensorShape() = [5, 5]
ObservationTensorLayout() = TensorLayout.CHW
ObservationTensorSize() = 25
MaxGameLength() = 5
ToString() = "deep_sea()"

# State 0
# x.....
# RR....
# RRL...
# RLRL..
# RLLRL.
# ......
IsTerminal() = False
History() = []
HistoryString() = ""
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
ObservationString(0) = "x............................."
ObservationTensor(0): ◉◯◯◯◯
                      ◯◯◯◯◯
                      ◯◯◯◯◯
                      ◯◯◯◯◯
                      ◯◯◯◯◯
Rewards() = [0.0]
Returns() = [0.0]
LegalActions() = [0, 1]
StringLegalActions() = ["LEFT", "RIGHT"]

# Apply action "LEFT"
action: 0

# State 1
# R.....
# xR....
# RRL...
# RLRL..
# RLLRL.
# ......
IsTerminal() = False
History() = [0]
HistoryString() = "0"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
ObservationString(0) = ".....x........................"
ObservationTensor(0): ◯◯◯◯◯
                      ◉◯◯◯◯
                      ◯◯◯◯◯
                      ◯◯◯◯◯
                      ◯◯◯◯◯
Rewards() = [0.0]
Returns() = [0.0]
LegalActions() = [0, 1]
StringLegalActions() = ["LEFT", "RIGHT"]

# Apply action "RIGHT"
action: 1

# State 2
# R.....
# RR....
# RxL...
# RLRL..
# RLLRL.
# ......
IsTerminal() = False
History() = [0, 1]
HistoryString() = "0, 1"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
ObservationString(0) = "...........x.................."
ObservationTensor(0): ◯◯◯◯◯
                      ◯◯◯◯◯
                      ◯◉◯◯◯
                      ◯◯◯◯◯
                      ◯◯◯◯◯
Rewards() = [-0.002]
Returns() = [-0.002]
LegalActions() = [0, 1]
StringLegalActions() = ["LEFT", "RIGHT"]

# Apply action "LEFT"
action: 0

# State 3
# R.....
# RR....
# RRL...
# xLRL..
# RLLRL.
# ......
IsTerminal() = False
History() = [0, 1, 0]
HistoryString() = "0, 1, 0"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
ObservationString(0) = "...............x.............."
ObservationTensor(0): ◯◯◯◯◯
                      ◯◯◯◯◯
                      ◯◯◯◯◯
                      ◉◯◯◯◯
                      ◯◯◯◯◯
Rewards() = [0.0]
Returns() = [-0.002]
LegalActions() = [0, 1]
StringLegalActions() = ["LEFT", "RIGHT"]

# Apply action "RIGHT"
action: 1

# State 4
# R.....
# RR....
# RRL...
# RLRL..
# RxLRL.
# ......
IsTerminal() = False
History() = [0, 1, 0, 1]
HistoryString() = "0, 1, 0, 1"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = 0
ObservationString(0) = ".....................x........"
ObservationTensor(0): ◯◯◯◯◯
                      ◯◯◯◯◯
                      ◯◯◯◯◯
                      ◯◯◯◯◯
                      ◯◉◯◯◯
Rewards() = [-0.002]
Returns() = [-0.004]
LegalActions() = [0, 1]
StringLegalActions() = ["LEFT", "RIGHT"]

# Apply action "LEFT"
action: 0

# State 5
# R.....
# RR....
# RRL...
# RLRL..
# RLLRL.
# ..x...
IsTerminal() = True
History() = [0, 1, 0, 1, 0]
HistoryString() = "0, 1, 0, 1, 0"
IsChanceNode() = False
IsSimultaneousNode() = False
CurrentPlayer() = -4
ObservationString(0) = "...........................x.."
ObservationTensor(0): ◯◯◯◯◯
                      ◯◯◯◯◯
                      ◯◯◯◯◯
                      ◯◯◯◯◯
                      ◯◯◯◯◯
Rewards() = [-0.002]
Returns() = [-0.006]
