Technology
Technology
```python
import numpy as np
import random
```
```python
class TicTacToeGame():
def __init__(self):
self.state = ' ' # a string of length 9 that encodes the state of the 3*3 board
self.player = 'X'
self.winner = None
```
- Initializes the game state, the current player (`'X'`), and the winner (initially `None`).
```python
def allowed_moves(self):
states = [] # store all possible next states
for i in range(len(self.state)):
if self.state[i] == ' ':
states.append(self.state[:i] + self.player + self.state[i+1:])
return states
```
- Returns a list of all possible next states by filling empty spaces with the current player's
symbol.
```python
def make_move(self, next_state):
if self.winner:
raise(Exception("Game already completed, cannot make another move!"))
if not self.__valid_move(next_state):
raise(Exception("Cannot make move {} to {} for player {}".format(
self.state, next_state, self.player)))
self.state = next_state
self.winner = self.predict_winner(self.state)
if self.winner:
self.player = None
elif self.player == 'X':
self.player = 'O'
else:
self.player = 'X'
```
```python
def playable(self):
return ( (not self.winner) and any(self.allowed_moves()) )
```
- Returns `True` if the game is still ongoing and there are moves available.
```python
def predict_winner(self, state):
lines = [(0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6)]
winner = None
for line in lines:
line_state = state[line[0]] + state[line[1]] + state[line[2]]
if line_state == 'XXX':
winner = 'X'
elif line_state == 'OOO':
winner = 'O'
return winner
```
- Checks all possible winning combinations and returns the winner (`'X'` or `'O'`).
```python
def __valid_move(self, next_state):
allowed_moves = self.allowed_moves()
if any(state == next_state for state in allowed_moves):
return True
return False
```
```python
def print_board(self):
s = self.state
print(' {} | {} | {} '.format(s[0],s[1],s[2]))
print(' -----------')
print(' {} | {} | {} '.format(s[3],s[4],s[5]))
print(' -----------')
print(' {} | {} | {} '.format(s[6],s[7],s[8]))
```
This class represents an AI agent that learns to play Tic-Tac-Toe using reinforcement
learning.
```python
class Agent():
def __init__(self, game_class, epsilon=0.1, alpha=0.5, value_player='X'):
self.V = dict() # dictionary to store value
self.NewGame = game_class
self.epsilon = epsilon
self.alpha = alpha
self.value_player = value_player
```
- Initializes the agent with a value dictionary `V`, game class, exploration rate `epsilon`,
learning rate `alpha`, and the player the agent values (`'X'` or `'O'`).
```python
def state_value(self, game_state):
return self.V.get(game_state, 0.0)
```
```python
def learn_game(self, num_episodes=1000):
for episode in range(num_episodes):
self.learn_from_episode()
```
```python
def learn_from_episode(self):
game = self.NewGame()
_, move = self.learn_select_move(game)
while move:
move = self.learn_from_move(game, move)
```
```python
def learn_from_move(self, game, move):
game.make_move(move)
r = self.__reward(game)
td_target = r
next_state_value = 0.0
selected_next_move = None
if game.playable():
best_next_move, selected_next_move = self.learn_select_move(game)
next_state_value = self.state_value(best_next_move)
current_state_value = self.state_value(move)
td_target = r + next_state_value
self.V[move] = current_state_value + self.alpha * (td_target - current_state_value)
return selected_next_move
```
- Updates the value of the current state based on the reward and the value of the next state.
```python
def learn_select_move(self, game):
allowed_state_values = self.__state_values(game.allowed_moves())
if game.player == self.value_player:
best_move = self.__argmax_V(allowed_state_values)
else:
best_move = self.__argmin_V(allowed_state_values)
selected_move = best_move
if random.random() < self.epsilon:
selected_move = self.__random_V(allowed_state_values)
```python
def play_select_move(self, game):
allowed_state_values = self.__state_values(game.allowed_moves())
if game.player == self.value_player:
return self.__argmax_V(allowed_state_values)
else:
return self.__random_V(allowed_state_values)
```
```python
def demo_game(self, verbose=False):
game = self.NewGame()
t=0
while game.playable():
if verbose:
print(" \nTurn {}\n".format(t))
game.print_board()
move = self.play_select_move(game)
game.make_move(move)
t += 1
if verbose:
print(" \nTurn {}\n".format(t))
game.print_board()
if game.winner:
if verbose:
print("\n{} is the winner!".format(game.winner))
return game.winner
else:
if verbose:
print("\nIt's a draw!")
return '-'
```
- Demonstrates a game played by the agent and optionally prints the game progress.
```python
def interactive_game(self, agent_player='X'):
game = self.NewGame()
t=0
while game.playable():
print(" \nTurn {}\n".format(t))
game.print_board()
if game.player == agent_player:
move = self.play_select_move(game)
game.make_move(move)
else:
move = self.__request_human_move(game)
game.make_move(move)
t += 1
if game.winner:
print("\n{} is the winner!".format(game.winner))
return game.winner
print("\nIt's a draw!")
return '-'
```
```python
def round_V(self):
for k in self.V.keys():
self.V[k] = round(self.V[k], 1)
```
def __random_V