0% found this document useful (0 votes)
10 views5 pages

Naive Bayes

Uploaded by

trishakhimesra
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as ODT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views5 pages

Naive Bayes

Uploaded by

trishakhimesra
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as ODT, PDF, TXT or read online on Scribd
You are on page 1/ 5

import pandas as pd

from sklearn.model_selection import train_test_split


from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# Load the CSV file


file_path = '../ML Prac/LP-1 Dataset/Decision Tree & Naive Bayes
Classification/Social_Network_Ads.csv'
df = pd.read_csv(file_path)

# Preprocess the data


# Encode 'Gender' column (since it's categorical)
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])

# Separate features (X) and labels (y)


X = df[['Gender', 'Age', 'EstimatedSalary']] # Features
y = df['Purchased'] # Labels

# Scale the features for better model performance


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train the Naive Bayes classifier (GaussianNB for numerical data)


nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# Predict on the test set


y_pred = nb_classifier.predict(X_test)

# Evaluate the model's performance


accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1)

# Print the results


print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)

# Take input from the user for prediction (for demonstration purposes)
# Get gender as 'Male' or 'Female', and age and salary
user_gender = input("Enter Gender (Male/Female): ")
user_age = float(input("Enter Age: "))
user_salary = float(input("Enter Estimated Salary: "))

# Encode gender (Male=1, Female=0)


user_gender_encoded = label_encoder.transform([user_gender])[0]
# Scale the user's input
user_input = scaler.transform([[user_gender_encoded, user_age, user_salary]])

# Predict whether the user will purchase


user_prediction = nb_classifier.predict(user_input)

# Print the prediction result


print(f"Prediction: {'Purchased' if user_prediction[0] == 1 else 'Not Purchased'}")

# Import necessary libraries


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

# Load the CSV file, skipping the initial lines with descriptions
file_path = '../ML Prac/LP-1 Dataset/Decision Tree & Naive Bayes Classification/pima-indians-
diabetes.csv'
df = pd.read_csv(file_path, header=None, skiprows=9)

# Assign column names based on dataset attributes


df.columns = [
'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'
]

# Strip any leading/trailing spaces from the column names


df.columns = df.columns.str.strip()

# Check for missing values and handle them (drop rows with missing values for simplicity)
df = df.dropna()

# Separate features (X) and labels (y)


X = df[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
'BMI', 'DiabetesPedigreeFunction', 'Age']] # Features
y = df['Outcome'] # Labels
# Scale the features for better model performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train the Naive Bayes classifier (GaussianNB for numerical data)


nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# Predict on the test set


y_pred = nb_classifier.predict(X_test)

# Evaluate the model's performance


accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1)

# Print the results


print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)

# Take input from the user for prediction (for demonstration purposes)
user_pregnancies = int(input("Enter Number of Pregnancies: "))
user_glucose = int(input("Enter Plasma Glucose Concentration (2 hours in OGTT): "))
user_blood_pressure = int(input("Enter Diastolic Blood Pressure (mm Hg): "))
user_skin_thickness = int(input("Enter Triceps Skin Fold Thickness (mm): "))
user_insulin = int(input("Enter 2-Hour Serum Insulin (mu U/ml): "))
user_bmi = float(input("Enter Body Mass Index (BMI): "))
user_diabetes_pedigree = float(input("Enter Diabetes Pedigree Function: "))
user_age = int(input("Enter Age (years): "))

# Create a list for the user input


user_input = [[user_pregnancies, user_glucose, user_blood_pressure, user_skin_thickness,
user_insulin, user_bmi, user_diabetes_pedigree, user_age]]

# Scale the user's input


user_input_scaled = scaler.transform(user_input)

# Predict whether the user has diabetes


user_prediction = nb_classifier.predict(user_input_scaled)

# Print the prediction result


print(f"Prediction: {'Diabetic' if user_prediction[0] == 1 else 'Not Diabetic'}")
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# Load the CSV file


file_path = '../ML Prac/LP-1 Dataset/Decision Tree & Naive Bayes Classification/NaiveBayes.csv'
# Provide the correct path to your nAIVEBAYES.csv file
df = pd.read_csv(file_path)

# Strip any leading/trailing spaces from the column names


df.columns = df.columns.str.strip()

# Separate features (X) and labels (y)


X = df[['Age', 'Salary']] # Features: Age and Salary
y = df['Purchased'] # Labels: Purchased

# Scale the features for better model performance


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train the Naive Bayes classifier (GaussianNB for numerical data)


nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)

# Predict on the test set


y_pred = nb_classifier.predict(X_test)

# Evaluate the model's performance


accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1)

# Print the results


print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)

You might also like