0% found this document useful (0 votes)
14 views7 pages

Technology

technology
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views7 pages

Technology

technology
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 7

### Importing Libraries

```python
import numpy as np
import random
```

- `numpy` is imported as `np`, though it is not used in the provided code.


- `random` is imported to facilitate random choices in the agent's decision-making process.

### TicTacToeGame Class

This class encapsulates the Tic-Tac-Toe game logic.

#### `__init__` Method

```python
class TicTacToeGame():
def __init__(self):
self.state = ' ' # a string of length 9 that encodes the state of the 3*3 board
self.player = 'X'
self.winner = None
```

- Initializes the game state, the current player (`'X'`), and the winner (initially `None`).

#### `allowed_moves` Method

```python
def allowed_moves(self):
states = [] # store all possible next states
for i in range(len(self.state)):
if self.state[i] == ' ':
states.append(self.state[:i] + self.player + self.state[i+1:])
return states
```

- Returns a list of all possible next states by filling empty spaces with the current player's
symbol.

#### `make_move` Method

```python
def make_move(self, next_state):
if self.winner:
raise(Exception("Game already completed, cannot make another move!"))
if not self.__valid_move(next_state):
raise(Exception("Cannot make move {} to {} for player {}".format(
self.state, next_state, self.player)))

self.state = next_state
self.winner = self.predict_winner(self.state)
if self.winner:
self.player = None
elif self.player == 'X':
self.player = 'O'
else:
self.player = 'X'
```

- Updates the game state to `next_state`.


- Checks if the game is already over or if the move is valid.
- Updates the current player and checks for a winner.

#### `playable` Method

```python
def playable(self):
return ( (not self.winner) and any(self.allowed_moves()) )
```

- Returns `True` if the game is still ongoing and there are moves available.

#### `predict_winner` Method

```python
def predict_winner(self, state):
lines = [(0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6)]
winner = None
for line in lines:
line_state = state[line[0]] + state[line[1]] + state[line[2]]
if line_state == 'XXX':
winner = 'X'
elif line_state == 'OOO':
winner = 'O'
return winner
```

- Checks all possible winning combinations and returns the winner (`'X'` or `'O'`).

#### `__valid_move` Method

```python
def __valid_move(self, next_state):
allowed_moves = self.allowed_moves()
if any(state == next_state for state in allowed_moves):
return True
return False
```

- Checks if a given `next_state` is a valid move.

#### `print_board` Method

```python
def print_board(self):
s = self.state
print(' {} | {} | {} '.format(s[0],s[1],s[2]))
print(' -----------')
print(' {} | {} | {} '.format(s[3],s[4],s[5]))
print(' -----------')
print(' {} | {} | {} '.format(s[6],s[7],s[8]))
```

- Prints the current game board.

### Agent Class

This class represents an AI agent that learns to play Tic-Tac-Toe using reinforcement
learning.

#### `__init__` Method

```python
class Agent():
def __init__(self, game_class, epsilon=0.1, alpha=0.5, value_player='X'):
self.V = dict() # dictionary to store value
self.NewGame = game_class
self.epsilon = epsilon
self.alpha = alpha
self.value_player = value_player
```

- Initializes the agent with a value dictionary `V`, game class, exploration rate `epsilon`,
learning rate `alpha`, and the player the agent values (`'X'` or `'O'`).

#### `state_value` Method

```python
def state_value(self, game_state):
return self.V.get(game_state, 0.0)
```

- Returns the value of a given game state.


#### `learn_game` Method

```python
def learn_game(self, num_episodes=1000):
for episode in range(num_episodes):
self.learn_from_episode()
```

- Trains the agent by playing a specified number of episodes.

#### `learn_from_episode` Method

```python
def learn_from_episode(self):
game = self.NewGame()
_, move = self.learn_select_move(game)
while move:
move = self.learn_from_move(game, move)
```

- Runs a learning episode by repeatedly selecting and making moves.

#### `learn_from_move` Method

```python
def learn_from_move(self, game, move):
game.make_move(move)
r = self.__reward(game)
td_target = r
next_state_value = 0.0
selected_next_move = None
if game.playable():
best_next_move, selected_next_move = self.learn_select_move(game)
next_state_value = self.state_value(best_next_move)
current_state_value = self.state_value(move)
td_target = r + next_state_value
self.V[move] = current_state_value + self.alpha * (td_target - current_state_value)
return selected_next_move
```

- Updates the value of the current state based on the reward and the value of the next state.

#### `learn_select_move` Method

```python
def learn_select_move(self, game):
allowed_state_values = self.__state_values(game.allowed_moves())
if game.player == self.value_player:
best_move = self.__argmax_V(allowed_state_values)
else:
best_move = self.__argmin_V(allowed_state_values)

selected_move = best_move
if random.random() < self.epsilon:
selected_move = self.__random_V(allowed_state_values)

return (best_move, selected_move)


```

- Selects the best move using an epsilon-greedy strategy.

#### `play_select_move` Method

```python
def play_select_move(self, game):
allowed_state_values = self.__state_values(game.allowed_moves())
if game.player == self.value_player:
return self.__argmax_V(allowed_state_values)
else:
return self.__random_V(allowed_state_values)
```

- Selects the move to play during a demonstration game.

#### `demo_game` Method

```python
def demo_game(self, verbose=False):
game = self.NewGame()
t=0
while game.playable():
if verbose:
print(" \nTurn {}\n".format(t))
game.print_board()
move = self.play_select_move(game)
game.make_move(move)
t += 1
if verbose:
print(" \nTurn {}\n".format(t))
game.print_board()
if game.winner:
if verbose:
print("\n{} is the winner!".format(game.winner))
return game.winner
else:
if verbose:
print("\nIt's a draw!")
return '-'
```

- Demonstrates a game played by the agent and optionally prints the game progress.

#### `interactive_game` Method

```python
def interactive_game(self, agent_player='X'):
game = self.NewGame()
t=0
while game.playable():
print(" \nTurn {}\n".format(t))
game.print_board()
if game.player == agent_player:
move = self.play_select_move(game)
game.make_move(move)
else:
move = self.__request_human_move(game)
game.make_move(move)
t += 1

print(" \nTurn {}\n".format(t))


game.print_board()

if game.winner:
print("\n{} is the winner!".format(game.winner))
return game.winner
print("\nIt's a draw!")
return '-'
```

- Allows a human to play against the agent.

#### `round_V` Method

```python
def round_V(self):
for k in self.V.keys():
self.V[k] = round(self.V[k], 1)
```

- Rounds the values in the value dictionary to one decimal place.

#### Private Methods


```python
def __state_values(self, game_states):
return dict((state, self.state_value(state)) for state in game_states)

def __argmax_V(self, state_values):


max_V = max(state_values.values())
chosen_state = random.choice([state for state, v in state_values.items() if v == max_V])
return chosen_state

def __argmin_V(self, state_values):


min_V = min(state_values.values())
chosen_state = random.choice([state for state, v in state_values.items() if v == min_V])
return chosen_state

def __random_V

You might also like