0% found this document useful (0 votes)

10 views8 pages

Week 12 Assignment

Uploaded by

bhargavianjaneya

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

10 views8 pages

Week 12 Assignment

Uploaded by

bhargavianjaneya

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 8

Part 1: Mutual Information Classification

# Import necessary libraries

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns

# Load the loan dataset

df = pd.read_csv("C:\\Users\\bharg\\Downloads\\loan.csv")

# Display the first few rows of the dataset

print(df.head())

# Separate features (X) and the target variable (y)

target_column = 'Loan_Status'
X = df.drop(columns=[target_column])
y = df[target_column]

# Apply label encoding to handle categorical variables

label_encoder = LabelEncoder()
X_encoded = X.apply(label_encoder.fit_transform)

# Split the dataset into training and testing sets (80:20 ratio)
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y,
test_size=0.2, random_state=42)

# Perform Mutual Information Classification-based feature selection

k_features = 5 # Choose an appropriate value of K (number of features
to select)
selector = SelectKBest(score_func=mutual_info_classif, k=k_features)
X_selected = selector.fit_transform(X_train, y_train)

# Get the indices of the selected features

selected_feature_indices = selector.get_support(indices=True)

# Print the names or indices of the selected features

selected_feature_names = X_train.columns[selected_feature_indices]
print(f'Selected features: {selected_feature_names}')

# Bonus Task: Visualize mutual information scores for each feature

mi_scores = pd.Series(selector.scores_, index=X_train.columns)
mi_scores = mi_scores.sort_values(ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x=mi_scores.values, y=mi_scores.index, palette='viridis')
plt.title('Mutual Information Scores for Features')
plt.xlabel('Mutual Information Score')
plt.ylabel('Features')
plt.show()

Loan_ID Gender Married Dependents Education Self_Employed \

0 LP001002 Male No 0 Graduate No
1 LP001003 Male Yes 1 Graduate No
2 LP001005 Male Yes 0 Graduate Yes
3 LP001006 Male Yes 0 Not Graduate No
4 LP001008 Male No 0 Graduate No

ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \

0 5849 0.0 NaN 360.0
1 4583 1508.0 128.0 360.0
2 3000 0.0 66.0 360.0
3 2583 2358.0 120.0 360.0
4 6000 0.0 141.0 360.0

Credit_History Property_Area Loan_Status

0 1.0 Urban Y
1 1.0 Rural N
2 1.0 Urban Y
3 1.0 Urban Y
4 1.0 Urban Y
Selected features: Index(['Dependents', 'ApplicantIncome',
'LoanAmount', 'Loan_Amount_Term',
'Credit_History'],
dtype='object')
Part 2: Mutual Information Regression
pip install pandas scikit-learn

Requirement already satisfied: pandas in c:\users\bharg\anaconda3\lib\

site-packages (2.0.3)
Requirement already satisfied: scikit-learn in c:\users\bharg\
anaconda3\lib\site-packages (1.3.0)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\users\
bharg\anaconda3\lib\site-packages (from pandas) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in c:\users\bharg\
anaconda3\lib\site-packages (from pandas) (2023.3.post1)
Requirement already satisfied: tzdata>=2022.1 in c:\users\bharg\
anaconda3\lib\site-packages (from pandas) (2023.3)
Requirement already satisfied: numpy>=1.21.0 in c:\users\bharg\
anaconda3\lib\site-packages (from pandas) (1.24.3)
Requirement already satisfied: scipy>=1.5.0 in c:\users\bharg\
anaconda3\lib\site-packages (from scikit-learn) (1.11.1)
Requirement already satisfied: joblib>=1.1.1 in c:\users\bharg\
anaconda3\lib\site-packages (from scikit-learn) (1.2.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in c:\users\bharg\
anaconda3\lib\site-packages (from scikit-learn) (2.2.0)
Requirement already satisfied: six>=1.5 in c:\users\bharg\anaconda3\
lib\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)
Note: you may need to restart the kernel to use updated packages.

# Import necessary libraries

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest,
mutual_info_regression
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns

# Load the Housing dataset

df = pd.read_csv('C:\\Users\\bharg\\Downloads\\housing.csv')

# Separate features (X) and the target variable (y)

target_column = 'SalePrice'
X = df.drop(columns=[target_column])
y = df[target_column]

# Apply label encoding to handle categorical variables

label_encoder = LabelEncoder()
X_encoded = X.apply(label_encoder.fit_transform)

# Split the dataset into training and testing sets (80:20 ratio)
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y,
test_size=0.2, random_state=42)
# Perform Mutual Information Regression-based feature selection
k_features = 3# Choose an appropriate value of K (number of features
to select)
selector = SelectKBest(score_func=mutual_info_regression,
k=k_features)
X_selected = selector.fit_transform(X_train, y_train)

# Get the indices of the selected features

selected_feature_indices = selector.get_support(indices=True)

# Print the names or indices of the selected features

selected_feature_names = X_train.columns[selected_feature_indices]
print(f'Selected features: {selected_feature_names}')

# Bonus Task: Visualize mutual information scores for each feature

mi_scores = pd.Series(selector.scores_, index=X_train.columns)
mi_scores = mi_scores.sort_values(ascending=False)

Selected features: Index(['OverallQual', 'GrLivArea', 'GarageCars'],

dtype='object')
Part 3 : Linear Regression on the Housing Dataset
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
# Load the Housing dataset
df = pd.read_csv('C:\\Users\\bharg\\Downloads\\housing.csv')

# Separate features (X) and the target variable (y)

target_column = 'SalePrice'
X = df.drop(columns=[target_column])
y = df[target_column]

# Identify numerical and categorical features

numerical_features = X.select_dtypes(include=['int64',
'float64']).columns
categorical_features = X.select_dtypes(include=['object']).columns

# Create preprocessing pipelines for numerical and categorical

features
numerical_pipeline = Pipeline([ ('imputer',
SimpleImputer(strategy='mean')),('num', 'passthrough')])

categorical_pipeline = Pipeline([('imputer',
SimpleImputer(strategy='most_frequent')),
('cat',
OneHotEncoder(handle_unknown='ignore'))])

# Combine the preprocessing pipelines using ColumnTransformer

preprocessor = ColumnTransformer(transformers=[('num',
numerical_pipeline, numerical_features),
('cat',
categorical_pipeline, categorical_features))

# Apply preprocessing on the entire dataset

X_preprocessed = preprocessor.fit_transform(X)

# Split the dataset into training and testing sets (80:20 ratio)
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y,
test_size=0.2, random_state=42)

# Initialize the Linear Regression model

linear_reg_model = LinearRegression()
# Create the final pipeline with feature preprocessing and model
training
pipeline = Pipeline([('regressor', linear_reg_model)])

# Fit the model to the training data

pipeline.fit(X_train, y_train)

# Predict house prices for the testing data

y_pred = pipeline.predict(X_test)

# Evaluate the performance of the model

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Print the MSE and R^2 values

print(f'Mean Squared Error (MSE): {mse}')
print(f'R-squared (R^2): {r2}')

# Plot a scatter plot between predicted and actual house prices

plt.scatter(y_test, y_pred)
plt.xlabel('Actual House Prices')
plt.ylabel('Predicted House Prices')
plt.title('Scatter Plot of Actual vs Predicted House Prices')
plt.show()

Mean Squared Error (MSE): 980359454.4892789

R-squared (R^2): 0.8721880363353154
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.preprocessing import LabelEncoder

# Load the dataset (Assuming it's a CSV file)

loan_data = pd.read_csv("C:\\Users\\bharg\\Downloads\\loan.csv")

# Display the first few rows of the dataset

print("Original Dataset:")
print(loan_data.head())

# Separate features (X) and target variable (y)

X = loan_data.drop("Loan_Status", axis=1)
y = loan_data["Loan_Status"]

# Apply label encoding for categorical variables

label_encoder = LabelEncoder()
for column in X.select_dtypes(include=['object']).columns:
X[column] = label_encoder.fit_transform(X[column])

# Handle null values (You may need to customize this based on your
dataset)
X.fillna(0, inplace=True) # Filling null values with 0 for simplicity

# Split the dataset into training and testing sets (80% training, 20%
testing)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)

# Use SelectKBest with Mutual Information Classification

k_best = 5 # Choose an appropriate value of K
selector = SelectKBest(mutual_info_classif, k=k_best)
X_train_selected = selector.fit_transform(X_train, y_train)

# Get the selected feature indices

selected_indices = selector.get_support(indices=True)

# Get the selected feature names

selected_features = X.columns[selected_indices]

# Print the selected features

print(f"\nSelected Features (Top {k_best}):")
print(selected_features)

Humanities and Social Sciences (Humss) Grade 11 Grade 12: ST Century From The Philippines and The World
83% (6)
Humanities and Social Sciences (Humss) Grade 11 Grade 12: ST Century From The Philippines and The World
1 page
Apex Financial Services Loan Data Automation
No ratings yet
Apex Financial Services Loan Data Automation
18 pages
Action Plan in English
100% (4)
Action Plan in English
4 pages
Classification Problems
100% (1)
Classification Problems
25 pages
Practical Exam Instructions: Canadian Welding Bureau
No ratings yet
Practical Exam Instructions: Canadian Welding Bureau
4 pages
HK TSA Writing Paper 3 2020 - 3ERW3
No ratings yet
HK TSA Writing Paper 3 2020 - 3ERW3
12 pages
Loan Status Prediction
No ratings yet
Loan Status Prediction
23 pages
Chief Education Supervisor & Education Program
No ratings yet
Chief Education Supervisor & Education Program
27 pages
Loan Prediction
No ratings yet
Loan Prediction
33 pages
Credit Card Approve Predict Bynvd
No ratings yet
Credit Card Approve Predict Bynvd
90 pages
Loan Prediction
No ratings yet
Loan Prediction
26 pages
Linear Models Reading
No ratings yet
Linear Models Reading
26 pages
A Note On R
No ratings yet
A Note On R
90 pages
ML 1-10
No ratings yet
ML 1-10
53 pages
22K61A0654 2 Sasi Auto
No ratings yet
22K61A0654 2 Sasi Auto
24 pages
Machine Learning (P1)
No ratings yet
Machine Learning (P1)
9 pages
Final Project Making Predictions From Data-Course 2: October 6, 2020
No ratings yet
Final Project Making Predictions From Data-Course 2: October 6, 2020
20 pages
Loan Approval
No ratings yet
Loan Approval
12 pages
'Universalbank - CSV': #Reading The File
No ratings yet
'Universalbank - CSV': #Reading The File
4 pages
Loan Approval Model Prediction
No ratings yet
Loan Approval Model Prediction
10 pages
Python Code For Loan Default Prediction
No ratings yet
Python Code For Loan Default Prediction
4 pages
Standard Bank Home Loan Prediction
No ratings yet
Standard Bank Home Loan Prediction
11 pages
MSML Project 1
No ratings yet
MSML Project 1
8 pages
Machine Learning Program
No ratings yet
Machine Learning Program
12 pages
Loan ppt1
No ratings yet
Loan ppt1
9 pages
LOan Final
No ratings yet
LOan Final
6 pages
Ranvijay 12203409
No ratings yet
Ranvijay 12203409
13 pages
Practical 3
No ratings yet
Practical 3
8 pages
Exercise4 Solution
No ratings yet
Exercise4 Solution
20 pages
I Love Merge
No ratings yet
I Love Merge
56 pages
Train
No ratings yet
Train
17 pages
Predicting Credit Risk 1713295035
No ratings yet
Predicting Credit Risk 1713295035
19 pages
ML Final Prac
No ratings yet
ML Final Prac
47 pages
#Group: B (ML) : Numpy NP Pandas PD
No ratings yet
#Group: B (ML) : Numpy NP Pandas PD
9 pages
05 E RandomForest LoanData
No ratings yet
05 E RandomForest LoanData
8 pages
Credit Scores Classification
No ratings yet
Credit Scores Classification
104 pages
Praveen Ai
No ratings yet
Praveen Ai
6 pages
SSRN Id3769854
No ratings yet
SSRN Id3769854
8 pages
ML
No ratings yet
ML
23 pages
Prediciton of Loan Apprval-Project Report
No ratings yet
Prediciton of Loan Apprval-Project Report
82 pages
Lab 02
No ratings yet
Lab 02
12 pages
Loan
No ratings yet
Loan
11 pages
Mlext
No ratings yet
Mlext
1 page
Regression Algorithm
No ratings yet
Regression Algorithm
9 pages
Advanced Modelling Techniques Anurag Payel
No ratings yet
Advanced Modelling Techniques Anurag Payel
41 pages
Data Science Record - 05
No ratings yet
Data Science Record - 05
20 pages
Unit 1: Shobana T S Assistant Professor Dept. of ISE, BMSCE
No ratings yet
Unit 1: Shobana T S Assistant Professor Dept. of ISE, BMSCE
127 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
20 pages
ML Manual
No ratings yet
ML Manual
9 pages
1) Download The Binary Classification Dataset For... - Colab
No ratings yet
1) Download The Binary Classification Dataset For... - Colab
6 pages
Data Frame Notes3
No ratings yet
Data Frame Notes3
39 pages
ML Programs
No ratings yet
ML Programs
14 pages
Machine Learning
No ratings yet
Machine Learning
10 pages
Python File
No ratings yet
Python File
5 pages
ML Spy Programs
No ratings yet
ML Spy Programs
16 pages
YSR Part1 PDF
No ratings yet
YSR Part1 PDF
16 pages
Data Preprocessing 2
No ratings yet
Data Preprocessing 2
5 pages
Da 012307
No ratings yet
Da 012307
8 pages
Project 4 - House Price Prediction - Ipynb - Colab
No ratings yet
Project 4 - House Price Prediction - Ipynb - Colab
5 pages
Writing A Student Recommendation Letter
No ratings yet
Writing A Student Recommendation Letter
3 pages
A Descriptive Study To Assess The Knowledge On Legal and Ethical Aspects of Nursing Among Outgoing and Final Year BSC Nursing Students at Sacred Heart Nursing College, Madurai
No ratings yet
A Descriptive Study To Assess The Knowledge On Legal and Ethical Aspects of Nursing Among Outgoing and Final Year BSC Nursing Students at Sacred Heart Nursing College, Madurai
4 pages
ML Manual
No ratings yet
ML Manual
24 pages
Iii Aid - ML
No ratings yet
Iii Aid - ML
30 pages
ML Full For Print New 1
No ratings yet
ML Full For Print New 1
38 pages
Ids Lab
No ratings yet
Ids Lab
14 pages
Q1 Module 2 MIL
No ratings yet
Q1 Module 2 MIL
10 pages
ML Merged
No ratings yet
ML Merged
28 pages
SanatKulkarni - AP22110010183 - Assignment3-1
No ratings yet
SanatKulkarni - AP22110010183 - Assignment3-1
4 pages
PedagogySyllabus F11
No ratings yet
PedagogySyllabus F11
3 pages
Theories of Earth Formation
No ratings yet
Theories of Earth Formation
3 pages
Cartography
No ratings yet
Cartography
3 pages
The Effect of Sociocultural and Economic Factor in Broken Homes and Childhood Development
No ratings yet
The Effect of Sociocultural and Economic Factor in Broken Homes and Childhood Development
5 pages
KPMG
No ratings yet
KPMG
8 pages
CV Kishore
No ratings yet
CV Kishore
3 pages
Penilaian Kurikulum
No ratings yet
Penilaian Kurikulum
9 pages
Ls Student Parent Handbook SY2021 2022
No ratings yet
Ls Student Parent Handbook SY2021 2022
83 pages
Perspectives and Problems of Codifying Nigerian Pidgin English Orthography
No ratings yet
Perspectives and Problems of Codifying Nigerian Pidgin English Orthography
9 pages
Learning Delivery Modalities (LDM) 2 Module 3B: Learning Resources
No ratings yet
Learning Delivery Modalities (LDM) 2 Module 3B: Learning Resources
6 pages
Daftar Pustaka
No ratings yet
Daftar Pustaka
6 pages
Getting Away With Murder Final
No ratings yet
Getting Away With Murder Final
34 pages
Authoritative Coach - Building Youth Through Sport
No ratings yet
Authoritative Coach - Building Youth Through Sport
16 pages
STP Reflection
No ratings yet
STP Reflection
1 page
Ir l03 Midterm Test
No ratings yet
Ir l03 Midterm Test
8 pages
Kalita & Deka (2024)
No ratings yet
Kalita & Deka (2024)
6 pages
King of Happy
No ratings yet
King of Happy
5 pages
See Hear Think Feel and Imagine by Sattyakee D'com Bhuyan
No ratings yet
See Hear Think Feel and Imagine by Sattyakee D'com Bhuyan
4 pages
English 10 Quarter 4 Lessons (Week1 - Week 2)
No ratings yet
English 10 Quarter 4 Lessons (Week1 - Week 2)
3 pages
Draft 2023 EWF Side Meeting
No ratings yet
Draft 2023 EWF Side Meeting
2 pages
Call For Abstracts - Innovation and Research Summit 2025
No ratings yet
Call For Abstracts - Innovation and Research Summit 2025
1 page
Advanced C Concepts and Programming: First Edition
From Everand
Advanced C Concepts and Programming: First Edition
Gayatri
3/5 (1)

Week 12 Assignment

Uploaded by

Week 12 Assignment

Uploaded by

Part 1: Mutual Information Classification

# Import necessary libraries

# Load the loan dataset

# Display the first few rows of the dataset

# Separate features (X) and the target variable (y)

# Apply label encoding to handle categorical variables

# Perform Mutual Information Classification-based feature selection

# Get the indices of the selected features

# Print the names or indices of the selected features

# Bonus Task: Visualize mutual information scores for each feature

Loan_ID Gender Married Dependents Education Self_Employed \

ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \

Credit_History Property_Area Loan_Status

Requirement already satisfied: pandas in c:\users\bharg\anaconda3\lib\

# Import necessary libraries

# Load the Housing dataset

# Separate features (X) and the target variable (y)

# Apply label encoding to handle categorical variables

# Get the indices of the selected features

# Print the names or indices of the selected features

# Bonus Task: Visualize mutual information scores for each feature

Selected features: Index(['OverallQual', 'GrLivArea', 'GarageCars'],

# Separate features (X) and the target variable (y)

# Identify numerical and categorical features

# Create preprocessing pipelines for numerical and categorical

# Combine the preprocessing pipelines using ColumnTransformer

# Apply preprocessing on the entire dataset

# Initialize the Linear Regression model

# Fit the model to the training data

# Predict house prices for the testing data

# Evaluate the performance of the model

# Print the MSE and R^2 values

# Plot a scatter plot between predicted and actual house prices

Mean Squared Error (MSE): 980359454.4892789

# Load the dataset (Assuming it's a CSV file)

# Display the first few rows of the dataset

# Separate features (X) and target variable (y)

# Apply label encoding for categorical variables

# Use SelectKBest with Mutual Information Classification

# Get the selected feature indices

# Get the selected feature names

# Print the selected features

You might also like