0% found this document useful (0 votes)
1 views

command_classifier

The document outlines a Python module for a command classification model using machine learning techniques, specifically employing a Random Forest Classifier. It includes methods for training the model, predicting intents from user commands, and saving/loading the model. The implementation utilizes NLTK for text processing and supports hyperparameter tuning through grid search.

Uploaded by

raynyx77
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
1 views

command_classifier

The document outlines a Python module for a command classification model using machine learning techniques, specifically employing a Random Forest Classifier. It includes methods for training the model, predicting intents from user commands, and saving/loading the model. The implementation utilizes NLTK for text processing and supports hyperparameter tuning through grid search.

Uploaded by

raynyx77
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

#!

/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
LEO Command Classifier Model

This module implements a more sophisticated command classification model


using machine learning techniques.
"""

import os
import pickle
import numpy as np
import logging
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

class CommandClassifier:
"""
A machine learning model for classifying user commands into intents.
"""

def __init__(self, model_path=None):


"""
Initialize the command classifier.

Args:
model_path (str): Path to a saved model file
"""
self.model = None
self.lemmatizer = WordNetLemmatizer()
self.stopwords = set(stopwords.words('english'))

# Ensure NLTK data is available


self._ensure_nltk_data()

# Load model if provided


if model_path and os.path.exists(model_path):
self.load_model(model_path)

def _ensure_nltk_data(self):
"""Ensure required NLTK data is downloaded."""
try:
nltk.data.find('tokenizers/punkt')
nltk.data.find('corpora/wordnet')
nltk.data.find('corpora/stopwords')
except LookupError:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

def _tokenize(self, text):


"""
Tokenize, lemmatize, and remove stopwords from text.

Args:
text (str): The text to process

Returns:
list: Processed tokens
"""
tokens = word_tokenize(text.lower())
return [self.lemmatizer.lemmatize(token) for token in tokens
if token.isalpha() and token not in self.stopwords]

def train(self, X, y, grid_search=False):


"""
Train the command classifier.

Args:
X (list): List of command texts
y (list): List of corresponding intent labels
grid_search (bool): Whether to perform grid search for hyperparameter
tuning

Returns:
float: Model accuracy
"""
# Create pipeline
pipeline = Pipeline([
('vectorizer', TfidfVectorizer(tokenizer=self._tokenize)),
('classifier', RandomForestClassifier(n_estimators=100,
random_state=42))
])

if grid_search:
# Define parameter grid
param_grid = {
'vectorizer__max_features': [None, 1000, 5000],
'vectorizer__ngram_range': [(1, 1), (1, 2)],
'classifier__n_estimators': [50, 100, 200],
'classifier__max_depth': [None, 10, 20]
}

# Perform grid search


self.model = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)
self.model.fit(X, y)

# Log best parameters


logging.info(f"Best parameters: {self.model.best_params_}")

# Use best estimator


self.model = self.model.best_estimator_
else:
# Train with default parameters
self.model = pipeline
self.model.fit(X, y)

# Calculate accuracy
accuracy = self.model.score(X, y)
logging.info(f"Model trained with accuracy: {accuracy:.2f}")
return accuracy

def predict(self, command):


"""
Predict the intent of a command.

Args:
command (str): The command to classify

Returns:
tuple: (intent, confidence)
"""
if not self.model:
return "unknown", 0.0

# Predict probabilities
probs = self.model.predict_proba([command])[0]
max_idx = np.argmax(probs)

# Get intent and confidence


intent = self.model.classes_[max_idx]
confidence = probs[max_idx]

return intent, confidence

def save_model(self, model_path):


"""
Save the model to a file.

Args:
model_path (str): Path to save the model
"""
if not self.model:
logging.error("No model to save")
return False

try:
os.makedirs(os.path.dirname(model_path), exist_ok=True)
with open(model_path, 'wb') as f:
pickle.dump(self.model, f)
logging.info(f"Model saved to {model_path}")
return True
except Exception as e:
logging.error(f"Error saving model: {str(e)}")
return False

def load_model(self, model_path):


"""
Load a model from a file.

Args:
model_path (str): Path to the model file

Returns:
bool: True if successful, False otherwise
"""
try:
with open(model_path, 'rb') as f:
self.model = pickle.load(f)
logging.info(f"Model loaded from {model_path}")
return True
except Exception as e:
logging.error(f"Error loading model: {str(e)}")
return False

You might also like