0% found this document useful (0 votes)
24 views16 pages

ABHAYMLFILE

Ml file

Uploaded by

ranabeena804
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
24 views16 pages

ABHAYMLFILE

Ml file

Uploaded by

ranabeena804
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 16

Name: Abhay Chand Ramola

Course: BCA(6) Sec: A


Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Problem Statement: Write a python program to implement logistic regression on California_housing


dataset.
Source code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset


df = pd.read_csv('/content/sample_data/california_housing_train.csv')

# Data preprocessing by dropping any rows with missing values


df.dropna(inplace=True)

# Binning the target variable 'median_house_value' into two categories


median_value = df['median_house_value'].median()
df['value_category'] = (df['median_house_value'] > median_value).astype(int)

# Splitting the dataset into X and y variables


X = df.drop(['median_house_value', 'value_category'], axis=1)
y = df['value_category']

# Splitting the data into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Logistic Regression model


model = LogisticRegression()

# Training the model


model.fit(X_train_scaled, y_train)

# Predictions on the testing set


y_pred = model.predict(X_test_scaled)

# Evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy:, {accuracy}")
print(f"Confusion matrix:\n{conf_matrix} ")

Output:
Accuracy:, 0.8370588235294117
Confusion matrix:
[[1397 259]
[ 295 1449]]
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Problem Statement : Write a python program to implement ID3 algorithm using entropy in decision tree.
Source Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,confusion_matrix

#Load the dataset


df=pd.read_csv('/content/sample_data/california_housing_train.csv')

#Data preprocessing
#Dropping any rows with missing values
df.dropna(inplace=True)

#Splitting the dataset into features and target values


X=df.drop('median_house_value',axis=1)#Replalce 'target_column_name' with actual column name
y=df['median_house_value']# Replace 'target_column_name' with actual column name

#Splitting the dataset into training and testing sets


X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

#Feature Scaling
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
#Decision Tree model
model=DecisionTreeClassifier(criterion='entropy') #Using ID#(Entropy) criterion

#Training the model


model.fit(X_train_scaled,y_train)

#Preddictions on the testing set


y_pred=model.predict(X_test_scaled)

#Model evaluation
accuracy=accuracy_score(y_test,y_pred)
print("Accuracy: ",accuracy)
print("Confusion matrix : \n",conf_matrix)

Output:
Accuracy: 0.025588235294117648
Confusion matrix :
[[1397 259]
[ 295 1449]]
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Problem Statement: Write a python program to implement CART algorithm for decision tree.
Source Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,confusion_matrix

#Load the dataset


df=pd.read_csv('/content/sample_data/california_housing_train.csv')

#Data Preprocessing
#Dropping any rows with missing values
df.dropna(inplace=True)

#Splitting the dataset into features and target variables


X=df.drop('median_house_value',axis=1) #Replace 'target_column_name' with actual target column name
y=df['median_house_value']#Replace 'target_column_name' with actual target column name

#Splitting the dataset into trianing and testing sets


X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

#Feature Scaling
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
#CART (Decision Tree)model
model.fit(X_train_scaled,y_train)

#Predictions on the testing set


y_pred=model.predict(X_test_scaled)

#Model evaluation
accuracy=accuracy_score(y_test,y_pred)
print("Accuracy: ",accuracy)
print("Confusion matrix : \n",conf_matrix)

Output:
Accuracy: 0.023823529411764705
Confusion matrix :
[[1397 259]
[ 295 1449]]
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Problem Statement: Write a python program to implement SVM using linear kernel on iris.csv.
Source Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report,accuracy_score

url="http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
column_names=['sepal_length','sepal_width','petal_length','petal_width','species']
iris=pd.read_csv(url ,header=None, names=column_names)

print(iris.head())

X=iris.iloc[:,:-1].values #all columns except the last one


y=iris.iloc[:,-1].values #the last column

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

svm=SVC(kernel='linear',random_state=42)
svm.fit(X_train,y_train)
y_pred=svm.predict(X_test)
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
accuracy=accuracy_score(y_test,y_pred)
print(f"Accuracy:{accuracy:.2f}")
print(classification_report(y_test,y_pred))

Output:
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
Accuracy:0.98
precision recall f1-score support

Iris-setosa 1.00 1.00 1.00 19


Iris-versicolor 1.00 0.92 0.96 13
Iris-virginica 0.93 1.00 0.96 13

accuracy 0.98 45
macro avg 0.98 0.97 0.97 45
weighted avg 0.98 0.98 0.98 45
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Problem Statement: Write a python program to carry out visualization for each feature separately .
Source Code:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris

# Load the Iris dataset


iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
target_names = iris.target_names

# Plot histograms for each feature


plt.figure(figsize=(12, 6))
for i in range(X.shape[1]):
plt.subplot(2, 2, i+1)
sns.histplot(X[:, i], kde=True, color='skyblue')
plt.title(feature_names[i])
plt.tight_layout()
plt.show()

# Load Iris dataset in a DataFrame for pairplot


iris_df = sns.load_dataset('iris')

# Correct the hue parameter to a valid column


sns.pairplot(iris_df, hue='species')
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
plt.show()

# PCA Visualization
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

# Scatter plot for PCA components


plt.figure(figsize=(8, 6))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=y, palette='viridis', legend='full')
plt.title('PCA Visualization')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.show()
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Output:
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

Problem Statement: Write a program to data analyse using supervised algorithms building a predictive
model for customer churn in a subscription based bussiness.
Source Code:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Step 1: Data Generation


def generate_customer_churn_data(num_customers=1000, start_date='2019-01-01', end_date='2022-01-
01'):
start_date = pd.to_datetime(start_date)
end_date = pd.to_datetime(end_date)

customer_ids = np.arange(1, num_customers + 1)


join_dates = [np.random.choice(pd.date_range(start_date, end_date)) for _ in range(num_customers)]
churn_dates = [join_date + pd.Timedelta(days=np.random.randint(30, 365)) for join_date in join_dates]
churn_status = ['Churned' if date <= end_date else 'Active' for date in churn_dates]

data = {
'CustomerID': customer_ids,
'JoinDate': join_dates,
'ChurnDate': churn_dates,
'ChurnStatus': churn_status
}

df = pd.DataFrame(data)
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
return df

# Step 2: Data Preprocessing


def preprocess_data(df):
df['JoinYear'] = df['JoinDate'].dt.year
df['JoinMonth'] = df['JoinDate'].dt.month
df['JoinDay'] = df['JoinDate'].dt.day
df['JoinDayOfWeek'] = df['JoinDate'].dt.dayofweek

df['DaysToChurn'] = (df['ChurnDate'] - df['JoinDate']).dt.days


df.drop(['JoinDate', 'ChurnDate'], axis=1, inplace=True)

df['ChurnStatus'] = df['ChurnStatus'].map({'Active': 0, 'Churned': 1})

return df

# Step 3: Split Data


def split_data(df, test_size=0.2):
X = df.drop('ChurnStatus', axis=1)
y = df['ChurnStatus']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
return X_train, X_test, y_train, y_test

# Step 4: Model Training


def train_model(X_train, y_train):
model = RandomForestClassifier(n_estimators=100, random_state=42)

model.fit(X_train, y_train)
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
return model

# Step 5: Model Evaluation


def evaluate_model(model, X_test, y_test):
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
# Step 6: Model Deployment
def save_model(model, filepath='customer_churn_model.pkl'):
joblib.dump(model, filepath)
print("Model saved successfully.")

def main():
# Step 1: Generate data
df = generate_customer_churn_data()

# Step 2: Preprocess data


df = preprocess_data(df)

# Step 3: Split data


X_train, X_test, y_train, y_test = split_data(df)

# Step 4: Train model


Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning
model = train_model(X_train, y_train)

# Step 5: Evaluate model


evaluate_model(model, X_test, y_test)

# Step 6: Save model


save_model(model)
main()

Output:
Accuracy: 0.975

Classification Report:
precision recall f1-score support
0 1.00 0.85 0.92 33
1 0.97 1.00 0.99 167

accuracy 0.97 200


macro avg 0.99 0.92 0.95 200
weighted avg 0.98 0.97 0.97 200

Confusion Matrix:
[[ 28 5]
[ 0 167]]
Model saved successfully.
Name: Abhay Chand Ramola
Course: BCA(6) Sec: A
Roll No: 2121020(05)
Subject: Fundamental of Machine Learning

You might also like