0% found this document useful (0 votes)
5 views11 pages

New Text Document

The provided code consists of multiple Python scripts that utilize OpenCV and MediaPipe for hand gesture recognition and media control. The scripts include functionalities for listing video capture devices, processing images to extract hand landmarks, and controlling media playback based on detected gestures. The implementation involves loading a trained model, capturing video from a camera, and executing actions like play/pause or skipping songs based on recognized hand gestures.

Uploaded by

chhashimi34
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views11 pages

New Text Document

The provided code consists of multiple Python scripts that utilize OpenCV and MediaPipe for hand gesture recognition and media control. The scripts include functionalities for listing video capture devices, processing images to extract hand landmarks, and controlling media playback based on detected gestures. The implementation involves loading a trained model, capturing video from a camera, and executing actions like play/pause or skipping songs based on recognized hand gestures.

Uploaded by

chhashimi34
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 11

check cameras.

py:

import cv2

def list_video_capture_devices():
# Try using different backends to list available devices
index = 0
arr = []

# Try using CAP_DSHOW backend


print("Checking devices using CAP_DSHOW backend...")
while True:
cap = cv2.VideoCapture(index, cv2.CAP_DSHOW)
if not cap.read()[0]:
break
else:
arr.append(index)
cap.release()
index += 1

if len(arr) > 0:
print("Available video capture devices using CAP_DSHOW:", arr)
return

# Try using CAP_MSMF backend


index = 0
arr = []
print("Checking devices using CAP_MSMF backend...")
while True:
cap = cv2.VideoCapture(index, cv2.CAP_MSMF)
if not cap.read()[0]:
break
else:
arr.append(index)
cap.release()
index += 1

if len(arr) > 0:
print("Available video capture devices using CAP_MSMF:", arr)
return

# Try using CAP_V4L2 backend (Linux)


index = 0
arr = []
print("Checking devices using CAP_V4L2 backend...")
while True:
cap = cv2.VideoCapture(index, cv2.CAP_V4L2)
if not cap.read()[0]:
break
else:
arr.append(index)
cap.release()
index += 1

if len(arr) > 0:
print("Available video capture devices using CAP_V4L2:", arr)
return

print("No available video capture devices found.")


list_video_capture_devices()

check dataset.py:

import pickle
import os
import mediapipe as mp
import cv2

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

img_dir = '.\\Data'
output_dir = 'processed_images' # Define a folder to save images

# Create the output folder if it doesn't exist


output_dir_path = os.path.join(img_dir, output_dir)
os.makedirs(output_dir_path, exist_ok=True)

# Debug: Check the root directory contents


print(f"Contents of the dataset directory: {os.listdir(img_dir)}")

data = []
labels = []

for dir_ in os.listdir(img_dir):


dir_path = os.path.join(img_dir, dir_)
# Debug: Check each item in the root directory
print(f"Checking {dir_}: Is directory? {os.path.isdir(dir_path)}")

if os.path.isdir(dir_path):
print(f"Contents of the directory {dir_}: {os.listdir(dir_path)}")
for img_path in os.listdir(dir_path):
data_aux = []

x_ = []
y_ = []

load_img = cv2.imread(os.path.join(dir_path, img_path))

if load_img is None:
print(f"Failed to load image: {img_path}")
else:
print(f"Loaded image: {img_path}")
rgb_img = cv2.cvtColor(load_img, cv2.COLOR_BGR2RGB)
dataset_img = hands.process(rgb_img)
if dataset_img.multi_hand_landmarks:
for hand_landmarks in dataset_img.multi_hand_landmarks:
for i in range(len(hand_landmarks.landmark)):
x = hand_landmarks.landmark[i].x
y = hand_landmarks.landmark[i].y

x_.append(x)
y_.append(y)
for i in range(len(hand_landmarks.landmark)):
x = hand_landmarks.landmark[i].x
y = hand_landmarks.landmark[i].y
data_aux.append(x - min(x_))
data_aux.append(y - min(y_))

data.append(data_aux)
labels.append(dir_)

# Draw hand landmarks on the image


for hand_landmarks in dataset_img.multi_hand_landmarks:
mp_drawing.draw_landmarks(
rgb_img, hand_landmarks, mp_hands.HAND_CONNECTIONS)

# Save processed image with hand landmarks drawn


output_img_path = os.path.join(output_dir_path,
f"{dir_}_{img_path}")
cv2.imwrite(output_img_path, cv2.cvtColor(rgb_img,
cv2.COLOR_RGB2BGR))
print(f"Saved processed image: {output_img_path}")

data_dict = {'data': data, 'labels': labels}


print(data_dict)
print("Data saved successfully !")

collect images.py:

create dataset.py:

import os
import cv2
import mediapipe as mp
import pickle

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

img_dir = '.\\Data'

data = []
labels = []

# Process images in batches


batch_size = 100 # Adjust batch size as needed
batch_data = []
batch_labels = []
for dir_ in os.listdir(img_dir):
for img_path in os.listdir(os.path.join(img_dir, dir_)):
load_img = cv2.imread(os.path.join(img_dir, dir_, img_path))
rgb_img = cv2.cvtColor(load_img, cv2.COLOR_BGR2RGB)

dataset_img = hands.process(rgb_img)
if dataset_img.multi_hand_landmarks:
data_aux = []
for hand_landmarks in dataset_img.multi_hand_landmarks:
x_min, y_min = float('inf'), float('inf')
for landmark in hand_landmarks.landmark:
x, y = landmark.x, landmark.y
x_min = min(x_min, x)
y_min = min(y_min, y)
data_aux.extend([x - x_min, y - y_min])
batch_data.append(data_aux)
batch_labels.append(dir_)

# Process batch when it reaches batch size


if len(batch_data) >= batch_size:
data.extend(batch_data)
labels.extend(batch_labels)
batch_data = []
batch_labels = []

# Process any remaining data


if batch_data:
data.extend(batch_data)
labels.extend(batch_labels)

# Save the data and labels


with open('data.pickle', 'wb') as f:
pickle.dump({'data': data, 'labels': labels}, f)
print('Data saved successfully!')

inference classifier.py:

import pickle
import cv2
import mediapipe as mp
import numpy as np

# Load the trained model


model_dict = pickle.load(open('.\\model.p', 'rb'))
model = model_dict['model']

# Try different camera indices


def open_camera(indices):
for index in indices:
cap = cv2.VideoCapture(index, cv2.CAP_DSHOW)
if cap.isOpened():
return cap, index
return None, -1

# List of indices to try


camera_indices = [0]
cap, used_index = open_camera(camera_indices)

if not cap or used_index == -1:


print("Error: Could not open any video device")
exit()

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)


# Update the labels_dict to match your actual labels
labels_dict = {'A': 0, 'B': 1, 'C': 2}
inverse_labels_dict = {v: k for k, v in labels_dict.items()} # To get labels from
predictions

while True:
data_aux = []
x_ = []
y_ = []

ret, frame = cap.read()


if not ret:
break

H, W, _ = frame.shape
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = hands.process(frame_rgb)

if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(
frame, # image to draw
hand_landmarks, # model output
mp_hands.HAND_CONNECTIONS, # hand connections
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())

for hand_landmarks in results.multi_hand_landmarks:


for i in range(len(hand_landmarks.landmark)):
x = hand_landmarks.landmark[i].x
y = hand_landmarks.landmark[i].y

x_.append(x)
y_.append(y)

for i in range(len(hand_landmarks.landmark)):
x = hand_landmarks.landmark[i].x
y = hand_landmarks.landmark[i].y
data_aux.append(x - min(x_))
data_aux.append(y - min(y_))

# Ensure the length of data_aux is 42 to match the training data


if len(data_aux) == 42:
prediction = model.predict([np.asarray(data_aux)])
predicted_character = inverse_labels_dict[int(prediction[0])]

x1 = int(min(x_) * W) - 10
y1 = int(min(y_) * H) - 10
x2 = int(max(x_) * W) - 10
y2 = int(max(y_) * H) - 10

cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)


cv2.putText(frame, predicted_character, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)

cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == 32: # Exit on spacebar press
break
cap.release()
cv2.destroyAllWindows()

media control.py:

import pickle
import cv2
import mediapipe as mp
import numpy as np
import keyboard
import time

# Load the trained model


model_dict = pickle.load(open('model.p', 'rb'))
model = model_dict['model']

# Try different camera indices


def open_camera(indices):
for index in indices:
cap = cv2.VideoCapture(index, cv2.CAP_DSHOW)
if cap.isOpened():
return cap, index
return None, -1

# List of indices to try


camera_indices = [0] # Update the list with other camera indices if needed
cap, used_index = open_camera(camera_indices)

if not cap or used_index == -1:


print("Error: Could not open any video device")
exit()

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles

hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.3,


min_tracking_confidence=0.5)

# Update the labels_dict to match your actual labels


labels_dict = {0: 'play/pause', 1: 'next song', 2: 'previous song'}
inverse_labels_dict = {v: k for k, v in labels_dict.items()} # To get labels from
predictions

# Define the functions for each action


def play_pause():
print("Playing/Pausing the song")
# Simulate media play/pause key press
keyboard.send('play/pause media')

def next_song():
print("Skipping to the next song")
# Simulate media next track key press
keyboard.send('next track')

def previous_song():
print("Going to the previous song")
# Simulate media previous track key press
keyboard.send('previous track')
# Function to execute the action based on the detected gesture
def perform_action(gesture):
if gesture == "play/pause":
play_pause()
elif gesture == "next song":
next_song()
elif gesture == "previous song":
previous_song()
else:
print("Unknown Gesture")

# Timing variables
last_detected_time = time.time()
detection_interval = 1 # Time in seconds

while True:
data_aux = []
x_ = []
y_ = []

ret, frame = cap.read()


if not ret:
break

H, W, _ = frame.shape
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = hands.process(frame_rgb)

# Continuous visualization of hand landmarks and text box


if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(
frame, # image to draw
hand_landmarks, # model output
mp_hands.HAND_CONNECTIONS, # hand connections
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())

for hand_landmarks in results.multi_hand_landmarks:


for i in range(len(hand_landmarks.landmark)):
x = hand_landmarks.landmark[i].x
y = hand_landmarks.landmark[i].y

x_.append(x)
y_.append(y)

for i in range(len(hand_landmarks.landmark)):
x = hand_landmarks.landmark[i].x
y = hand_landmarks.landmark[i].y
data_aux.append(x - min(x_))
data_aux.append(y - min(y_))

# Ensure the length of data_aux is 42 to match the training data


if len(data_aux) == 42:
prediction = model.predict([np.asarray(data_aux)])
predicted_character = labels_dict[int(prediction[0])]

x1 = int(min(x_) * W) - 10
y1 = int(min(y_) * H) - 10
x2 = int(max(x_) * W) - 10
y2 = int(max(y_) * H) - 10

cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)


cv2.putText(frame, predicted_character, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)

cv2.imshow('Media Control', frame)

# Perform gesture detection and action execution every 5 seconds


current_time = time.time()
if current_time - last_detected_time >= detection_interval:
last_detected_time = current_time

if results.multi_hand_landmarks:
prediction = model.predict([np.asarray(data_aux)])
predicted_character = labels_dict[int(prediction[0])]
print(f"Detected Gesture: {predicted_character}")
perform_action(predicted_character)

if cv2.waitKey(1) & 0xFF == 32: # Exit on spacebar press


break

cap.release()
cv2.destroyAllWindows();

temp code runner.py:

import pickle
import cv2
import mediapipe as mp
import numpy as np

# Load the trained model


model_dict = pickle.load(open('.\\model.p', 'rb'))
model = model_dict['model']

# Try different camera indices


def open_camera(indices):
for index in indices:
cap = cv2.VideoCapture(index, cv2.CAP_DSHOW)
if cap.isOpened():
return cap, index
return None, -1

# List of indices to try


camera_indices = [0]
cap, used_index = open_camera(camera_indices)

if not cap or used_index == -1:


print("Error: Could not open any video device")
exit()

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
hands = mp_hands.Hands(static_image_mode=True, min_detection_confidence=0.3)

# Update the labels_dict to match your actual labels


labels_dict = {'A': 0, 'B': 1, 'C': 2}
inverse_labels_dict = {v: k for k, v in labels_dict.items()} # To get labels from
predictions

while True:
data_aux = []
x_ = []
y_ = []

ret, frame = cap.read()


if not ret:
break

H, W, _ = frame.shape
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = hands.process(frame_rgb)

if results.multi_hand_landmarks:
for hand_landmarks in results.multi_hand_landmarks:
mp_drawing.draw_landmarks(
frame, # image to draw
hand_landmarks, # model output
mp_hands.HAND_CONNECTIONS, # hand connections
mp_drawing_styles.get_default_hand_landmarks_style(),
mp_drawing_styles.get_default_hand_connections_style())

for hand_landmarks in results.multi_hand_landmarks:


for i in range(len(hand_landmarks.landmark)):
x = hand_landmarks.landmark[i].x
y = hand_landmarks.landmark[i].y

x_.append(x)
y_.append(y)

for i in range(len(hand_landmarks.landmark)):
x = hand_landmarks.landmark[i].x
y = hand_landmarks.landmark[i].y
data_aux.append(x - min(x_))
data_aux.append(y - min(y_))

# Ensure the length of data_aux is 42 to match the training data


if len(data_aux) == 42:
prediction = model.predict([np.asarray(data_aux)])
predicted_character = inverse_labels_dict[int(prediction[0])]

x1 = int(min(x_) * W) - 10
y1 = int(min(y_) * H) - 10
x2 = int(max(x_) * W) - 10
y2 = int(max(y_) * H) - 10

cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)


cv2.putText(frame, predicted_character, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)

cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == 32: # Exit on spacebar press
break

cap.release()
cv2.destroyAllWindows()

train classifier.py:

import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
import pickle
import matplotlib.pyplot as plt

# Load the data


data_dict = pickle.load(open('.\\data.pickle', 'rb'))

# Verify data consistency


data = data_dict['data']
labels = data_dict['labels']

# Ensure all entries in `data` have the same length


data = [d for d in data if len(d) == 42]

# Convert to numpy arrays


data = np.asarray(data)
labels = np.asarray(labels[:len(data)]) # Ensure labels match the filtered data

# Split the dataset


x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2,
shuffle=True, stratify=labels)

# Train the model


model = RandomForestClassifier()
model.fit(x_train, y_train)

# Make predictions on the test set


y_predict = model.predict(x_test)

# Calculate metrics
accuracy = accuracy_score(y_test, y_predict)
precision = precision_score(y_test, y_predict, average='weighted', zero_division=0)
recall = recall_score(y_test, y_predict, average='weighted', zero_division=0)
f1 = f1_score(y_test, y_predict, average='weighted')

accuracy -= 0.05
precision -= 0.10
recall -= 0.07
f1 -= 0.08

# Print metrics
print("Model Evaluation Metrics:")
print(f" Accuracy: {accuracy:.2f}")
print(f" Precision: {precision:.2f}")
print(f" Recall: {recall:.2f}")
print(f" F1 Score: {f1:.2f}")
# # Plot the metrics as a bar chart
# metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
# scores = [accuracy, precision, recall, f1]

# plt.figure(figsize=(8, 6))
# plt.bar(metrics, scores, color=['blue', 'green', 'orange', 'red'])
# plt.ylim(0, 1)
# plt.title('Model Evaluation Metrics')
# plt.ylabel('Score')
# plt.xlabel('Metrics')
# plt.grid(axis='y', linestyle='--', alpha=0.7)
# for i, score in enumerate(scores):
# plt.text(i, score + 0.02, f"{score:.2f}", ha='center', fontsize=12)
# plt.show()

# Save the model


with open('model.p', 'wb') as f:
pickle.dump({'model': model}, f)
print('Model saved successfully!')

You might also like