0% found this document useful (0 votes)
5 views

import numpy as np

The document outlines a Python implementation of a Snake game using Q-learning for reinforcement learning. It defines the game environment, including the snake's movement, food placement, and collision detection, as well as a Q-learning agent that learns to play the game through exploration and exploitation. The agent uses a neural network model to predict Q-values for actions based on the game state.

Uploaded by

eelambharati
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views

import numpy as np

The document outlines a Python implementation of a Snake game using Q-learning for reinforcement learning. It defines the game environment, including the snake's movement, food placement, and collision detection, as well as a Q-learning agent that learns to play the game through exploration and exploitation. The agent uses a neural network model to predict Q-values for actions based on the game state.

Uploaded by

eelambharati
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 5

import numpy as np

import random
import tensorflow as tf
from tensorflow.keras import layers
import pygame
from enum import Enum
from collections import namedtuple

# Q-learning setup
class Direction(Enum):
RIGHT = 1
LEFT = 2
UP = 3
DOWN = 4

Point = namedtuple('Point', 'x, y')

BLOCK_SIZE = 20
SPEED = 10
WHITE = (255, 255, 255)
RED = (200, 0, 0)
BLUE1 = (0, 0, 255)
BLUE2 = (0, 100, 255)
BLACK = (0, 0, 0)

class SnakeGame:
def __init__(self, w=640, h=480):
self.w = w
self.h = h
# init display
self.display = pygame.display.set_mode((self.w, self.h))
pygame.display.set_caption('Snake')
self.clock = pygame.time.Clock()

# init game state


self.direction = Direction.RIGHT
self.head = Point(self.w / 2, self.h / 2)
self.snake = [self.head,
Point(self.head.x - BLOCK_SIZE, self.head.y),
Point(self.head.x - (2 * BLOCK_SIZE), self.head.y)]
self.score = 0
self.food = None
self._place__food()

def _place__food(self):
x = random.randint(0, (self.w - BLOCK_SIZE) // BLOCK_SIZE) * BLOCK_SIZE
y = random.randint(0, (self.h - BLOCK_SIZE) // BLOCK_SIZE) * BLOCK_SIZE
self.food = Point(x, y)
if self.food in self.snake:
self._place__food()

def reset(self):
self.direction = Direction.RIGHT
self.head = Point(self.w / 2, self.h / 2)
self.snake = [self.head,
Point(self.head.x - BLOCK_SIZE, self.head.y),
Point(self.head.x - (2 * BLOCK_SIZE), self.head.y)]
self.score = 0
self._place__food()
return self.get_state()

def get_state(self):
# A simple state representation: head position, direction, food position
return (self.head.x, self.head.y, self.food.x, self.food.y, self.direction.value)
def take_action(self, action):
if action == Direction.RIGHT:
self.direction = Direction.RIGHT
elif action == Direction.LEFT:
self.direction = Direction.LEFT
elif action == Direction.UP:
self.direction = Direction.UP
elif action == Direction.DOWN:
self.direction = Direction.DOWN
self._move(self.direction)

def _move(self, direction):


x = self.head.x
y = self.head.y
if direction == Direction.RIGHT:
x += BLOCK_SIZE
elif direction == Direction.LEFT:
x -= BLOCK_SIZE
elif direction == Direction.DOWN:
y += BLOCK_SIZE
elif direction == Direction.UP:
y -= BLOCK_SIZE
self.head = Point(x, y)

def step(self):
# Calculate reward and check for game over
if self._is_collision():
return self.get_state(), -10, True # Game over
elif self.head == self.food:
self.score += 1
self._place__food()
return self.get_state(), 10, False # Reward for eating food
else:
self.snake.pop()
return self.get_state(), -1, False # Small negative reward to encourage
movement

def _is_collision(self):
# Check if the snake hits the wall or itself
if self.head.x > self.w - BLOCK_SIZE or self.head.x < 0 or self.head.y > self.h
- BLOCK_SIZE or self.head.y < 0:
return True
if self.head in self.snake[1:]:
return True
return False

class QLearningAgent:
def __init__(self, action_space):
self.action_space = action_space
self.model = self.build_model()
self.epsilon = 1.0 # Exploration rate
self.epsilon_min = 0.1
self.epsilon_decay = 0.995
self.gamma = 0.9 # Discount factor
self.learning_rate = 0.001

def build_model(self):
model = tf.keras.Sequential([
layers.Dense(24, input_dim=5, activation='relu'),
layers.Dense(24, activation='relu'),
layers.Dense(4, activation='linear') # 4 possible actions
])
model.compile(loss='mse',
optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
return model

def act(self, state):


if np.random.rand() <= self.epsilon:
return random.choice(self.action_space) # Exploration
state = np.array(state).reshape(1, -1)
q_values = self.model.predict(state)
return np.argmax(q_values[0]) # Exploitation

def replay(self, state, action, reward, next_state, done):


target = reward
if not done:
next_state = np.array(next_state).reshape(1, -1)
target = reward + self.gamma * np.max(self.model.predict(next_state)
[0])

state = np.array(state).reshape(1, -1)


target_f = self.model.predict(state)
target_f[0][action]

You might also like