Week 12 Assignment
Week 12 Assignment
df = pd.read_csv("C:\\Users\\bharg\\Downloads\\loan.csv")
# Split the dataset into training and testing sets (80:20 ratio)
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y,
test_size=0.2, random_state=42)
plt.figure(figsize=(10, 6))
sns.barplot(x=mi_scores.values, y=mi_scores.index, palette='viridis')
plt.title('Mutual Information Scores for Features')
plt.xlabel('Mutual Information Score')
plt.ylabel('Features')
plt.show()
# Split the dataset into training and testing sets (80:20 ratio)
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y,
test_size=0.2, random_state=42)
# Perform Mutual Information Regression-based feature selection
k_features = 3# Choose an appropriate value of K (number of features
to select)
selector = SelectKBest(score_func=mutual_info_regression,
k=k_features)
X_selected = selector.fit_transform(X_train, y_train)
plt.figure(figsize=(10, 6))
sns.barplot(x=mi_scores.values, y=mi_scores.index, palette='viridis')
plt.title('Mutual Information Scores for Features')
plt.xlabel('Mutual Information Score')
plt.ylabel('Features')
plt.show()
categorical_pipeline = Pipeline([('imputer',
SimpleImputer(strategy='most_frequent')),
('cat',
OneHotEncoder(handle_unknown='ignore'))])
# Split the dataset into training and testing sets (80:20 ratio)
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y,
test_size=0.2, random_state=42)
# Handle null values (You may need to customize this based on your
dataset)
X.fillna(0, inplace=True) # Filling null values with 0 for simplicity
# Split the dataset into training and testing sets (80% training, 20%
testing)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)