command_classifier
command_classifier
/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
LEO Command Classifier Model
import os
import pickle
import numpy as np
import logging
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
class CommandClassifier:
"""
A machine learning model for classifying user commands into intents.
"""
Args:
model_path (str): Path to a saved model file
"""
self.model = None
self.lemmatizer = WordNetLemmatizer()
self.stopwords = set(stopwords.words('english'))
def _ensure_nltk_data(self):
"""Ensure required NLTK data is downloaded."""
try:
nltk.data.find('tokenizers/punkt')
nltk.data.find('corpora/wordnet')
nltk.data.find('corpora/stopwords')
except LookupError:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
Args:
text (str): The text to process
Returns:
list: Processed tokens
"""
tokens = word_tokenize(text.lower())
return [self.lemmatizer.lemmatize(token) for token in tokens
if token.isalpha() and token not in self.stopwords]
Args:
X (list): List of command texts
y (list): List of corresponding intent labels
grid_search (bool): Whether to perform grid search for hyperparameter
tuning
Returns:
float: Model accuracy
"""
# Create pipeline
pipeline = Pipeline([
('vectorizer', TfidfVectorizer(tokenizer=self._tokenize)),
('classifier', RandomForestClassifier(n_estimators=100,
random_state=42))
])
if grid_search:
# Define parameter grid
param_grid = {
'vectorizer__max_features': [None, 1000, 5000],
'vectorizer__ngram_range': [(1, 1), (1, 2)],
'classifier__n_estimators': [50, 100, 200],
'classifier__max_depth': [None, 10, 20]
}
# Calculate accuracy
accuracy = self.model.score(X, y)
logging.info(f"Model trained with accuracy: {accuracy:.2f}")
return accuracy
Args:
command (str): The command to classify
Returns:
tuple: (intent, confidence)
"""
if not self.model:
return "unknown", 0.0
# Predict probabilities
probs = self.model.predict_proba([command])[0]
max_idx = np.argmax(probs)
Args:
model_path (str): Path to save the model
"""
if not self.model:
logging.error("No model to save")
return False
try:
os.makedirs(os.path.dirname(model_path), exist_ok=True)
with open(model_path, 'wb') as f:
pickle.dump(self.model, f)
logging.info(f"Model saved to {model_path}")
return True
except Exception as e:
logging.error(f"Error saving model: {str(e)}")
return False
Args:
model_path (str): Path to the model file
Returns:
bool: True if successful, False otherwise
"""
try:
with open(model_path, 'rb') as f:
self.model = pickle.load(f)
logging.info(f"Model loaded from {model_path}")
return True
except Exception as e:
logging.error(f"Error loading model: {str(e)}")
return False