0% found this document useful (0 votes)
33 views2 pages

Ass 8

The document loads and preprocesses a speech commands dataset to extract MFCC features, splits the data into train, validation and test sets, defines an LSTM model, trains and evaluates the model, and provides a function to make predictions on new audio samples.

Uploaded by

Taqwa Elsayed
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
33 views2 pages

Ass 8

The document loads and preprocesses a speech commands dataset to extract MFCC features, splits the data into train, validation and test sets, defines an LSTM model, trains and evaluates the model, and provides a function to make predictions on new audio samples.

Uploaded by

Taqwa Elsayed
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

import os

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy

# Download and unzip the dataset


!wget
https://storage.googleapis.com/download.tensorflow.org/data/mini_speech_commands.zi
p
!unzip mini_speech_commands.zip

# Define constants
DATA_DIR = "mini_speech_commands"
LABELS = np.array(os.listdir(DATA_DIR))
NUM_CLASSES = len(LABELS)
SAMPLE_RATE = 16000
MFCC_MAX_LEN = 40
BATCH_SIZE = 32
EPOCHS = 10
VAL_SPLIT = 0.2

# Function to load and preprocess audio files


def preprocess_data():
labels = []
mfccs = []
for label in LABELS:
if label == "_background_noise_":
continue
label_dir = os.path.join(DATA_DIR, label)
for filename in os.listdir(label_dir):
filepath = os.path.join(label_dir, filename)
audio, _ = tf.audio.decode_wav(tf.io.read_file(filepath),
desired_channels=1)
audio = tf.squeeze(audio, axis=-1)
# Compute MFCC
mfcc =
tf.signal.mfccs_from_log_mel_spectrograms(tf.expand_dims(tf.expand_dims(audio,
axis=0), axis=-1), n_mfcc=MFCC_MAX_LEN)
mfccs.append(mfcc)
labels.append(label)
return np.array(mfccs), np.array(labels)

# Load and preprocess the data


mfccs, labels = preprocess_data()

# Convert labels to indices


label_to_index = {label: i for i, label in enumerate(LABELS)}
indices = np.array([label_to_index[label] for label in labels])

# Split the dataset into train, validation, and test sets


X_train, X_test, y_train, y_test = train_test_split(mfccs, indices, test_size=0.2,
random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2,
random_state=42)

# Define LSTM model


class LSTMModel(Model):
def __init__(self, num_classes):
super(LSTMModel, self).__init__()
self.lstm = layers.LSTM(128)
self.dense = layers.Dense(num_classes, activation='softmax')

def call(self, inputs):


x = self.lstm(inputs)
x = self.dense(x)
return x

# Instantiate the model


model = LSTMModel(num_classes=NUM_CLASSES)

# Compile the model


model.compile(optimizer=Adam(), loss=SparseCategoricalCrossentropy(),
metrics=['accuracy'])

# Train the model


history = model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS,
validation_data=(X_val, y_val))

# Evaluate the model


test_loss, test_accuracy = model.evaluate(X_test, y_test)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

# Inference on new audio samples


def predict_audio(audio):
# Compute MFCC
mfcc =
tf.signal.mfccs_from_log_mel_spectrograms(tf.expand_dims(tf.expand_dims(audio,
axis=0), axis=-1), n_mfcc=MFCC_MAX_LEN)
prediction = model.predict(mfcc)
predicted_label_index = np.argmax(prediction)
predicted_label = LABELS[predicted_label_index]
return predicted_label

# Example usage for inference


# audio_sample = load_audio("path_to_audio_file")
# predicted_label = predict_audio(audio_sample)
# print("Predicted Label:", predicted_label)

You might also like