Aiml Assignment-2
Aiml Assignment-2
Bayes’ Theorem
Where:
Algorithm
Start
End
Code
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report,
confusion_matrix
# Load dataset
df = pd.read_csv("emails.csv")
print(df.head())
# Rename column for consistency
df.rename(columns={'spam': 'label'}, inplace=True)
# Generate WordClouds
spam_words = ' '.join(df[df['label'] == 1]['text'])
ham_words = ' '.join(df[df['label'] == 0]['text'])
plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.title("Most Common Words in Spam Emails")
spam_wordcloud = WordCloud(width=400, height=300,
background_color='black', colormap='Reds').generate(spam_words)
plt.imshow(spam_wordcloud, interpolation='bilinear')
plt.axis("off")
plt.subplot(1,2,2)
plt.title("Most Common Words in Ham Emails")
ham_wordcloud = WordCloud(width=400, height=300,
background_color='black', colormap='Blues').generate(ham_words)
plt.imshow(ham_wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# Predictions
y_pred = classifier.predict(X_test)
# Evaluation
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, output_dict=True)
plt.figure(figsize=(8,5))
sns.barplot(x=metrics, y=values, palette="coolwarm")
plt.ylim(0, 1)
plt.ylabel("Score")
plt.title("Model Performance Metrics")
plt.show()
# Sample Prediction
sample_email = ["Congratulations! You've won a free car. Claim now!"]
sample_vector = vectorizer.transform(sample_email)
prediction = classifier.predict(sample_vector)
text spam
0 Subject: naturally irresistible your corporate... 1
1 Subject: the stock trading gunslinger fanny i... 1
2 Subject: unbelievable new homes made easy im ... 1
3 Subject: 4 color printing special request add... 1
4 Subject: do not have money , get software cds ... 1
Sample Email Prediction: Spam