0% found this document useful (0 votes)
5 views

import numpy as np

The document outlines a Python implementation of a Snake game using Q-learning for reinforcement learning. It defines the game environment, including the snake's movement, food placement, and collision detection, as well as a Q-learning agent that learns to play the game through exploration and exploitation. The agent uses a neural network model to predict Q-values for actions based on the game state.

Uploaded by

eelambharati
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

import numpy as np

The document outlines a Python implementation of a Snake game using Q-learning for reinforcement learning. It defines the game environment, including the snake's movement, food placement, and collision detection, as well as a Q-learning agent that learns to play the game through exploration and exploitation. The agent uses a neural network model to predict Q-values for actions based on the game state.

Uploaded by

eelambharati
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 5

import numpy as np

import random
import tensorflow as tf
from tensorflow.keras import layers
import pygame
from enum import Enum
from collections import namedtuple

# Q-learning setup
class Direction(Enum):
RIGHT = 1
LEFT = 2
UP = 3
DOWN = 4

Point = namedtuple('Point', 'x, y')

BLOCK_SIZE = 20
SPEED = 10
WHITE = (255, 255, 255)
RED = (200, 0, 0)
BLUE1 = (0, 0, 255)
BLUE2 = (0, 100, 255)
BLACK = (0, 0, 0)

class SnakeGame:
def __init__(self, w=640, h=480):
self.w = w
self.h = h
# init display
self.display = pygame.display.set_mode((self.w, self.h))
pygame.display.set_caption('Snake')
self.clock = pygame.time.Clock()

# init game state


self.direction = Direction.RIGHT
self.head = Point(self.w / 2, self.h / 2)
self.snake = [self.head,
Point(self.head.x - BLOCK_SIZE, self.head.y),
Point(self.head.x - (2 * BLOCK_SIZE), self.head.y)]
self.score = 0
self.food = None
self._place__food()

def _place__food(self):
x = random.randint(0, (self.w - BLOCK_SIZE) // BLOCK_SIZE) * BLOCK_SIZE
y = random.randint(0, (self.h - BLOCK_SIZE) // BLOCK_SIZE) * BLOCK_SIZE
self.food = Point(x, y)
if self.food in self.snake:
self._place__food()

def reset(self):
self.direction = Direction.RIGHT
self.head = Point(self.w / 2, self.h / 2)
self.snake = [self.head,
Point(self.head.x - BLOCK_SIZE, self.head.y),
Point(self.head.x - (2 * BLOCK_SIZE), self.head.y)]
self.score = 0
self._place__food()
return self.get_state()

def get_state(self):
# A simple state representation: head position, direction, food position
return (self.head.x, self.head.y, self.food.x, self.food.y, self.direction.value)
def take_action(self, action):
if action == Direction.RIGHT:
self.direction = Direction.RIGHT
elif action == Direction.LEFT:
self.direction = Direction.LEFT
elif action == Direction.UP:
self.direction = Direction.UP
elif action == Direction.DOWN:
self.direction = Direction.DOWN
self._move(self.direction)

def _move(self, direction):


x = self.head.x
y = self.head.y
if direction == Direction.RIGHT:
x += BLOCK_SIZE
elif direction == Direction.LEFT:
x -= BLOCK_SIZE
elif direction == Direction.DOWN:
y += BLOCK_SIZE
elif direction == Direction.UP:
y -= BLOCK_SIZE
self.head = Point(x, y)

def step(self):
# Calculate reward and check for game over
if self._is_collision():
return self.get_state(), -10, True # Game over
elif self.head == self.food:
self.score += 1
self._place__food()
return self.get_state(), 10, False # Reward for eating food
else:
self.snake.pop()
return self.get_state(), -1, False # Small negative reward to encourage
movement

def _is_collision(self):
# Check if the snake hits the wall or itself
if self.head.x > self.w - BLOCK_SIZE or self.head.x < 0 or self.head.y > self.h
- BLOCK_SIZE or self.head.y < 0:
return True
if self.head in self.snake[1:]:
return True
return False

class QLearningAgent:
def __init__(self, action_space):
self.action_space = action_space
self.model = self.build_model()
self.epsilon = 1.0 # Exploration rate
self.epsilon_min = 0.1
self.epsilon_decay = 0.995
self.gamma = 0.9 # Discount factor
self.learning_rate = 0.001

def build_model(self):
model = tf.keras.Sequential([
layers.Dense(24, input_dim=5, activation='relu'),
layers.Dense(24, activation='relu'),
layers.Dense(4, activation='linear') # 4 possible actions
])
model.compile(loss='mse',
optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
return model

def act(self, state):


if np.random.rand() <= self.epsilon:
return random.choice(self.action_space) # Exploration
state = np.array(state).reshape(1, -1)
q_values = self.model.predict(state)
return np.argmax(q_values[0]) # Exploitation

def replay(self, state, action, reward, next_state, done):


target = reward
if not done:
next_state = np.array(next_state).reshape(1, -1)
target = reward + self.gamma * np.max(self.model.predict(next_state)
[0])

state = np.array(state).reshape(1, -1)


target_f = self.model.predict(state)
target_f[0][action]

You might also like