import numpy as np
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import layers
import pygame
from enum import Enum
from collections import namedtuple
# Q-learning setup
class Direction(Enum):
RIGHT = 1
LEFT = 2
UP = 3
DOWN = 4
BLOCK_SIZE = 20
SPEED = 10
WHITE = (255, 255, 255)
RED = (200, 0, 0)
BLUE1 = (0, 0, 255)
BLUE2 = (0, 100, 255)
BLACK = (0, 0, 0)
class SnakeGame:
def __init__(self, w=640, h=480):
self.w = w
self.h = h
# init display
self.display = pygame.display.set_mode((self.w, self.h))
pygame.display.set_caption('Snake')
self.clock = pygame.time.Clock()
def _place__food(self):
x = random.randint(0, (self.w - BLOCK_SIZE) // BLOCK_SIZE) * BLOCK_SIZE
y = random.randint(0, (self.h - BLOCK_SIZE) // BLOCK_SIZE) * BLOCK_SIZE
self.food = Point(x, y)
if self.food in self.snake:
self._place__food()
def reset(self):
self.direction = Direction.RIGHT
self.head = Point(self.w / 2, self.h / 2)
self.snake = [self.head,
Point(self.head.x - BLOCK_SIZE, self.head.y),
Point(self.head.x - (2 * BLOCK_SIZE), self.head.y)]
self.score = 0
self._place__food()
return self.get_state()
def get_state(self):
# A simple state representation: head position, direction, food position
return (self.head.x, self.head.y, self.food.x, self.food.y, self.direction.value)
def take_action(self, action):
if action == Direction.RIGHT:
self.direction = Direction.RIGHT
elif action == Direction.LEFT:
self.direction = Direction.LEFT
elif action == Direction.UP:
self.direction = Direction.UP
elif action == Direction.DOWN:
self.direction = Direction.DOWN
self._move(self.direction)
def step(self):
# Calculate reward and check for game over
if self._is_collision():
return self.get_state(), -10, True # Game over
elif self.head == self.food:
self.score += 1
self._place__food()
return self.get_state(), 10, False # Reward for eating food
else:
self.snake.pop()
return self.get_state(), -1, False # Small negative reward to encourage
movement
def _is_collision(self):
# Check if the snake hits the wall or itself
if self.head.x > self.w - BLOCK_SIZE or self.head.x < 0 or self.head.y > self.h
- BLOCK_SIZE or self.head.y < 0:
return True
if self.head in self.snake[1:]:
return True
return False
class QLearningAgent:
def __init__(self, action_space):
self.action_space = action_space
self.model = self.build_model()
self.epsilon = 1.0 # Exploration rate
self.epsilon_min = 0.1
self.epsilon_decay = 0.995
self.gamma = 0.9 # Discount factor
self.learning_rate = 0.001
def build_model(self):
model = tf.keras.Sequential([
layers.Dense(24, input_dim=5, activation='relu'),
layers.Dense(24, activation='relu'),
layers.Dense(4, activation='linear') # 4 possible actions
])
model.compile(loss='mse',
optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
return model