import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
# Load the CSV file
file_path = '../ML Prac/LP-1 Dataset/Decision Tree & Naive Bayes
Classification/Social_Network_Ads.csv'
df = pd.read_csv(file_path)
# Preprocess the data
# Encode 'Gender' column (since it's categorical)
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])
# Separate features (X) and labels (y)
X = df[['Gender', 'Age', 'EstimatedSalary']] # Features
y = df['Purchased'] # Labels
# Scale the features for better model performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Train the Naive Bayes classifier (GaussianNB for numerical data)
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
# Predict on the test set
y_pred = nb_classifier.predict(X_test)
# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1)
# Print the results
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)
# Take input from the user for prediction (for demonstration purposes)
# Get gender as 'Male' or 'Female', and age and salary
user_gender = input("Enter Gender (Male/Female): ")
user_age = float(input("Enter Age: "))
user_salary = float(input("Enter Estimated Salary: "))
# Encode gender (Male=1, Female=0)
user_gender_encoded = label_encoder.transform([user_gender])[0]
# Scale the user's input
user_input = scaler.transform([[user_gender_encoded, user_age, user_salary]])
# Predict whether the user will purchase
user_prediction = nb_classifier.predict(user_input)
# Print the prediction result
print(f"Prediction: {'Purchased' if user_prediction[0] == 1 else 'Not Purchased'}")
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
# Load the CSV file, skipping the initial lines with descriptions
file_path = '../ML Prac/LP-1 Dataset/Decision Tree & Naive Bayes Classification/pima-indians-
diabetes.csv'
df = pd.read_csv(file_path, header=None, skiprows=9)
# Assign column names based on dataset attributes
df.columns = [
'Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'
]
# Strip any leading/trailing spaces from the column names
df.columns = df.columns.str.strip()
# Check for missing values and handle them (drop rows with missing values for simplicity)
df = df.dropna()
# Separate features (X) and labels (y)
X = df[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
'BMI', 'DiabetesPedigreeFunction', 'Age']] # Features
y = df['Outcome'] # Labels
# Scale the features for better model performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Train the Naive Bayes classifier (GaussianNB for numerical data)
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
# Predict on the test set
y_pred = nb_classifier.predict(X_test)
# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1)
# Print the results
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)
# Take input from the user for prediction (for demonstration purposes)
user_pregnancies = int(input("Enter Number of Pregnancies: "))
user_glucose = int(input("Enter Plasma Glucose Concentration (2 hours in OGTT): "))
user_blood_pressure = int(input("Enter Diastolic Blood Pressure (mm Hg): "))
user_skin_thickness = int(input("Enter Triceps Skin Fold Thickness (mm): "))
user_insulin = int(input("Enter 2-Hour Serum Insulin (mu U/ml): "))
user_bmi = float(input("Enter Body Mass Index (BMI): "))
user_diabetes_pedigree = float(input("Enter Diabetes Pedigree Function: "))
user_age = int(input("Enter Age (years): "))
# Create a list for the user input
user_input = [[user_pregnancies, user_glucose, user_blood_pressure, user_skin_thickness,
user_insulin, user_bmi, user_diabetes_pedigree, user_age]]
# Scale the user's input
user_input_scaled = scaler.transform(user_input)
# Predict whether the user has diabetes
user_prediction = nb_classifier.predict(user_input_scaled)
# Print the prediction result
print(f"Prediction: {'Diabetic' if user_prediction[0] == 1 else 'Not Diabetic'}")
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
# Load the CSV file
file_path = '../ML Prac/LP-1 Dataset/Decision Tree & Naive Bayes Classification/NaiveBayes.csv'
# Provide the correct path to your nAIVEBAYES.csv file
df = pd.read_csv(file_path)
# Strip any leading/trailing spaces from the column names
df.columns = df.columns.str.strip()
# Separate features (X) and labels (y)
X = df[['Age', 'Salary']] # Features: Age and Salary
y = df['Purchased'] # Labels: Purchased
# Scale the features for better model performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Train the Naive Bayes classifier (GaussianNB for numerical data)
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
# Predict on the test set
y_pred = nb_classifier.predict(X_test)
# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1)
# Print the results
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)