0% found this document useful (0 votes)
6 views

command_classifier

The document outlines a Python module for a command classification model using machine learning techniques, specifically employing a Random Forest Classifier. It includes methods for training the model, predicting intents from user commands, and saving/loading the model. The implementation utilizes NLTK for text processing and supports hyperparameter tuning through grid search.

Uploaded by

raynyx77
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views

command_classifier

The document outlines a Python module for a command classification model using machine learning techniques, specifically employing a Random Forest Classifier. It includes methods for training the model, predicting intents from user commands, and saving/loading the model. The implementation utilizes NLTK for text processing and supports hyperparameter tuning through grid search.

Uploaded by

raynyx77
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

#!

/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
LEO Command Classifier Model

This module implements a more sophisticated command classification model


using machine learning techniques.
"""

import os
import pickle
import numpy as np
import logging
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

class CommandClassifier:
"""
A machine learning model for classifying user commands into intents.
"""

def __init__(self, model_path=None):


"""
Initialize the command classifier.

Args:
model_path (str): Path to a saved model file
"""
self.model = None
self.lemmatizer = WordNetLemmatizer()
self.stopwords = set(stopwords.words('english'))

# Ensure NLTK data is available


self._ensure_nltk_data()

# Load model if provided


if model_path and os.path.exists(model_path):
self.load_model(model_path)

def _ensure_nltk_data(self):
"""Ensure required NLTK data is downloaded."""
try:
nltk.data.find('tokenizers/punkt')
nltk.data.find('corpora/wordnet')
nltk.data.find('corpora/stopwords')
except LookupError:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

def _tokenize(self, text):


"""
Tokenize, lemmatize, and remove stopwords from text.

Args:
text (str): The text to process

Returns:
list: Processed tokens
"""
tokens = word_tokenize(text.lower())
return [self.lemmatizer.lemmatize(token) for token in tokens
if token.isalpha() and token not in self.stopwords]

def train(self, X, y, grid_search=False):


"""
Train the command classifier.

Args:
X (list): List of command texts
y (list): List of corresponding intent labels
grid_search (bool): Whether to perform grid search for hyperparameter
tuning

Returns:
float: Model accuracy
"""
# Create pipeline
pipeline = Pipeline([
('vectorizer', TfidfVectorizer(tokenizer=self._tokenize)),
('classifier', RandomForestClassifier(n_estimators=100,
random_state=42))
])

if grid_search:
# Define parameter grid
param_grid = {
'vectorizer__max_features': [None, 1000, 5000],
'vectorizer__ngram_range': [(1, 1), (1, 2)],
'classifier__n_estimators': [50, 100, 200],
'classifier__max_depth': [None, 10, 20]
}

# Perform grid search


self.model = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)
self.model.fit(X, y)

# Log best parameters


logging.info(f"Best parameters: {self.model.best_params_}")

# Use best estimator


self.model = self.model.best_estimator_
else:
# Train with default parameters
self.model = pipeline
self.model.fit(X, y)

# Calculate accuracy
accuracy = self.model.score(X, y)
logging.info(f"Model trained with accuracy: {accuracy:.2f}")
return accuracy

def predict(self, command):


"""
Predict the intent of a command.

Args:
command (str): The command to classify

Returns:
tuple: (intent, confidence)
"""
if not self.model:
return "unknown", 0.0

# Predict probabilities
probs = self.model.predict_proba([command])[0]
max_idx = np.argmax(probs)

# Get intent and confidence


intent = self.model.classes_[max_idx]
confidence = probs[max_idx]

return intent, confidence

def save_model(self, model_path):


"""
Save the model to a file.

Args:
model_path (str): Path to save the model
"""
if not self.model:
logging.error("No model to save")
return False

try:
os.makedirs(os.path.dirname(model_path), exist_ok=True)
with open(model_path, 'wb') as f:
pickle.dump(self.model, f)
logging.info(f"Model saved to {model_path}")
return True
except Exception as e:
logging.error(f"Error saving model: {str(e)}")
return False

def load_model(self, model_path):


"""
Load a model from a file.

Args:
model_path (str): Path to the model file

Returns:
bool: True if successful, False otherwise
"""
try:
with open(model_path, 'rb') as f:
self.model = pickle.load(f)
logging.info(f"Model loaded from {model_path}")
return True
except Exception as e:
logging.error(f"Error loading model: {str(e)}")
return False

You might also like