0% found this document useful (0 votes)

5 views4 pages

Automatic Feature Selection

Uploaded by

bvinnuroiroi467

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

5 views4 pages

Automatic Feature Selection

Uploaded by

bvinnuroiroi467

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 4

Automatic Feature Selection

1. Univariate statistics:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import SelectPercentile
from sklearn.model_selection import train_test_split

# Load the breast cancer dataset

cancer = load_breast_cancer()

# Create a deterministic random number generator

rng = np.random.RandomState(42)

# Generate noise features

noise = rng.normal(size=(len(cancer.data), 50))

# Add noise features to the dataset

X_w_noise = np.hstack([cancer.data, noise])

# Split the dataset into training and test sets

X_train, X_test, y_train, y_test = train_test_split(
X_w_noise, cancer.target, random_state=0, test_size=0.5
)

# Use SelectPercentile to select the top 50% of features

select = SelectPercentile(percentile=50)
select.fit(X_train, y_train)

# Transform the training set to keep only the selected features

X_train_selected = select.transform(X_train)

# Print the shapes of the original and selected training datasets

print("X_train.shape: {}".format(X_train.shape))
print("X_train_selected.shape: {}".format(X_train_selected.shape))

import matplotlib.pyplot as plt

# Assuming you already have `select` fitted and the mask generated
mask = select.get_support()
print(mask)

# Visualize the mask -- black is True (selected), white is False (not selected)
plt.matshow(mask.reshape(1, -1), cmap='gray_r')
plt.xlabel("Feature index")
plt.yticks([]) # Optional: Hide y-ticks as they're not needed
plt.title("Feature Selection Mask") # Optional: Add a title
plt.show()
2. Model-based selection:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel
import matplotlib.pyplot as plt

# Load the breast cancer dataset

cancer = load_breast_cancer()

# Split the dataset into training and test sets

X_train, X_test, y_train, y_test = train_test_split(
cancer.data, cancer.target, random_state=0, test_size=0.5
)

# Initialize SelectFromModel with RandomForestClassifier

select = SelectFromModel(
RandomForestClassifier(n_estimators=100, random_state=42),
threshold="median"
)

# Fit the selector to the training data

select.fit(X_train, y_train)

# Transform the training and test sets

X_train_selected = select.transform(X_train)
X_test_selected = select.transform(X_test)

# Fit a RandomForest model on the selected features

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_selected, y_train)

select.fit(X_train, y_train)
X_train_l1 = select.transform(X_train)
print("X_train.shape: {}".format(X_train.shape))
print("X_train_l1.shape: {}".format(X_train_l1.shape))

# Print the accuracy on the test set with selected features

print("Score with selected features: {:.3f}".format(model.score(X_test_selected,
y_test)))

mask = select.get_support()
# visualize the mask -- black is True, white is False
plt.matshow(mask.reshape(1, -1), cmap='gray_r')
plt.xlabel("Sample index")

# Optional: Visualize the feature importances

importances = model.feature_importances_
plt.bar(range(len(importances)), importances)
plt.title("Feature Importances from Random Forest")
plt.xlabel("Feature index")
plt.ylabel("Importance score")
plt.show()

X_test_l1 = select.transform(X_test)
score = LogisticRegression().fit(X_train_l1, y_train).score(X_test_l1, y_test)
print("Test score: {:.3f}".format(score))
3. Iterative selection:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE
import matplotlib.pyplot as plt

# Load the breast cancer dataset

cancer = load_breast_cancer()

# Split the dataset into training and test sets

X_train, X_test, y_train, y_test = train_test_split(
cancer.data, cancer.target, random_state=0, test_size=0.5
)

# Initialize RFE with RandomForestClassifier

# Adjusting n_features_to_select to a valid number (e.g., 10)
select = RFE(RandomForestClassifier(n_estimators=100, random_state=42),
n_features_to_select=10)

# Fit the selector to the training data

select.fit(X_train, y_train)

# Get the mask of selected features

mask = select.get_support()

# Visualize the selected features

plt.matshow(mask.reshape(1, -1), cmap='gray_r')
plt.xlabel("Feature index")
plt.title("Selected Features from RFE")
plt.yticks([]) # Optional: Hide y-ticks as they are not needed
plt.show()

Satellite Communications
No ratings yet
Satellite Communications
192 pages
DSO Organizational Chart - by Michael W. Davis, DDS
No ratings yet
DSO Organizational Chart - by Michael W. Davis, DDS
1 page
Untitled Document
No ratings yet
Untitled Document
6 pages
Recsify Technologies Assignment
No ratings yet
Recsify Technologies Assignment
10 pages
AAM 6th Prac
No ratings yet
AAM 6th Prac
3 pages
Experiment 11
No ratings yet
Experiment 11
3 pages
Lab 4 - Feature Selection - Appendix
No ratings yet
Lab 4 - Feature Selection - Appendix
3 pages
DWDM Lab 3
No ratings yet
DWDM Lab 3
10 pages
1
No ratings yet
1
13 pages
23BCE7092 ML Lab Assignment
No ratings yet
23BCE7092 ML Lab Assignment
14 pages
Warpper Method
No ratings yet
Warpper Method
8 pages
MlLabManualdocx 2024 09 04 22 02 58
No ratings yet
MlLabManualdocx 2024 09 04 22 02 58
19 pages
MLfull
No ratings yet
MLfull
29 pages
SUMMARY
No ratings yet
SUMMARY
16 pages
Setup: This Notebook Contains All The Sample Code and Solutions To The Exercises in Chapter 7
No ratings yet
Setup: This Notebook Contains All The Sample Code and Solutions To The Exercises in Chapter 7
23 pages
Da Lab Mannual
No ratings yet
Da Lab Mannual
25 pages
ML
No ratings yet
ML
11 pages
20BCP021 Assignment 6
No ratings yet
20BCP021 Assignment 6
15 pages
C2 W4 Lab 02 Tree Ensemble
No ratings yet
C2 W4 Lab 02 Tree Ensemble
16 pages
Random Forest 1737667979
No ratings yet
Random Forest 1737667979
11 pages
Import Numpy As NP Import Pandas As PD
No ratings yet
Import Numpy As NP Import Pandas As PD
7 pages
ML 1
No ratings yet
ML 1
11 pages
Modelling and Simulation Sample Model 4
No ratings yet
Modelling and Simulation Sample Model 4
3 pages
Random Forest
No ratings yet
Random Forest
3 pages
ML Fat
No ratings yet
ML Fat
9 pages
Bacdeaf 23032025 115708 Split 1
No ratings yet
Bacdeaf 23032025 115708 Split 1
37 pages
Bagging - Ipynb - Colab
No ratings yet
Bagging - Ipynb - Colab
2 pages
Iii Aid - ML
No ratings yet
Iii Aid - ML
30 pages
AI ML - Cycle 2 Programs
No ratings yet
AI ML - Cycle 2 Programs
15 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
AML Lab
No ratings yet
AML Lab
14 pages
Heart: Our "Goal" Predict The Presence of Heart Disease in The Patient
100% (1)
Heart: Our "Goal" Predict The Presence of Heart Disease in The Patient
73 pages
Experiment 8 ML Vtu
No ratings yet
Experiment 8 ML Vtu
4 pages
Assignment 10
No ratings yet
Assignment 10
14 pages
5) Randomforest - Ipynb - Colaboratory
No ratings yet
5) Randomforest - Ipynb - Colaboratory
12 pages
Preductive Modelling Assignment
No ratings yet
Preductive Modelling Assignment
3 pages
Python Implementation of Random Forest Algorithm
No ratings yet
Python Implementation of Random Forest Algorithm
10 pages
This Study Resource Was
No ratings yet
This Study Resource Was
5 pages
C2 W4 Lab 02 Tree Ensemble
No ratings yet
C2 W4 Lab 02 Tree Ensemble
10 pages
Scikit Learn What Were Covering
No ratings yet
Scikit Learn What Were Covering
15 pages
Import Pandas As PD DF PD - Read - CSV ("Titanic - Train - CSV") DF - Head
No ratings yet
Import Pandas As PD DF PD - Read - CSV ("Titanic - Train - CSV") DF - Head
20 pages
Machine
100% (1)
Machine
45 pages
ML5 Implementation
No ratings yet
ML5 Implementation
32 pages
PYHTONPRACT
No ratings yet
PYHTONPRACT
4 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
FREE AI Code Generator - Generate Code Online in Any Language
No ratings yet
FREE AI Code Generator - Generate Code Online in Any Language
12 pages
Ashwin Report
No ratings yet
Ashwin Report
18 pages
Reast Cancer Prediction Using Debt
No ratings yet
Reast Cancer Prediction Using Debt
18 pages
Linearregression SVM
No ratings yet
Linearregression SVM
3 pages
X A Iiiiii Iiiiii
No ratings yet
X A Iiiiii Iiiiii
2 pages
Code and Output of Cancer Detection Model
No ratings yet
Code and Output of Cancer Detection Model
13 pages
Experiment 8
No ratings yet
Experiment 8
4 pages
Build A Random Forest Algorithm Aim
No ratings yet
Build A Random Forest Algorithm Aim
3 pages
Random Forest: The Algorithm in A Nutshell
No ratings yet
Random Forest: The Algorithm in A Nutshell
10 pages
ML Lab Experiment Shortened With Same Output
No ratings yet
ML Lab Experiment Shortened With Same Output
6 pages
ML File
No ratings yet
ML File
17 pages
Q3-Copy1: Pandas PD Numpy NP CSV
No ratings yet
Q3-Copy1: Pandas PD Numpy NP CSV
7 pages
10 Random - Forest - Algo
No ratings yet
10 Random - Forest - Algo
6 pages
FDP Session 4 (Decision Tree)
No ratings yet
FDP Session 4 (Decision Tree)
1 page
ML Minimized Programs
No ratings yet
ML Minimized Programs
9 pages
MCS-011: Problem Solving and Programming
From Everand
MCS-011: Problem Solving and Programming
Dr. DK Sukhani
No ratings yet
Random Sample Consensus: Robust Estimation in Computer Vision
From Everand
Random Sample Consensus: Robust Estimation in Computer Vision
Fouad Sabry
No ratings yet
CE UNIT-1 MATERIAL PDF
No ratings yet
CE UNIT-1 MATERIAL PDF
17 pages
Rajuvlsi1 Updated
No ratings yet
Rajuvlsi1 Updated
26 pages
BSNL Report
No ratings yet
BSNL Report
26 pages
CNC Nots 802d or 840d DX150
No ratings yet
CNC Nots 802d or 840d DX150
75 pages
Productores de Banano de Nicaragua Probanic Datos Climáticos de Estación Finca San Luis Enero, 2011
No ratings yet
Productores de Banano de Nicaragua Probanic Datos Climáticos de Estación Finca San Luis Enero, 2011
17 pages
Chapter 3 Data Modeling Using The Entity Relationship ER Model
No ratings yet
Chapter 3 Data Modeling Using The Entity Relationship ER Model
55 pages
Supermarket
No ratings yet
Supermarket
4 pages
CTPAT Job Aid - Personnel Training Checklist Sample - October 2021
No ratings yet
CTPAT Job Aid - Personnel Training Checklist Sample - October 2021
4 pages
Obciążenie Oblodzeniem
No ratings yet
Obciążenie Oblodzeniem
14 pages
Littering Stats Essay
No ratings yet
Littering Stats Essay
3 pages
Water Ingress Analysis and Splash Protection Evaluation For Vehicle Wading Using Non-Classical CFD Simulation
No ratings yet
Water Ingress Analysis and Splash Protection Evaluation For Vehicle Wading Using Non-Classical CFD Simulation
13 pages
Niact 2
No ratings yet
Niact 2
25 pages
Simple and Compound Entry
100% (1)
Simple and Compound Entry
4 pages
Hazid Record
No ratings yet
Hazid Record
21 pages
Inputs and Outputs List Page:1/21: Example-9: Sequential Control of Induction Motors
No ratings yet
Inputs and Outputs List Page:1/21: Example-9: Sequential Control of Induction Motors
7 pages
En (1070)
100% (1)
En (1070)
1 page
Scedule of Defense
No ratings yet
Scedule of Defense
1 page
Worksheet and Coronavirus 10 Ac
No ratings yet
Worksheet and Coronavirus 10 Ac
5 pages
ELE 4623: Control Systems: Faculty of Engineering Technology
No ratings yet
ELE 4623: Control Systems: Faculty of Engineering Technology
15 pages
Fallout 4 Bobblehead and Magazine Guide - Zone 1
No ratings yet
Fallout 4 Bobblehead and Magazine Guide - Zone 1
1 page
Pro Proctor User Guide
No ratings yet
Pro Proctor User Guide
24 pages
PDF Living On A Prayer - English Version
No ratings yet
PDF Living On A Prayer - English Version
17 pages
R S Aggarwal Solution Class 11 Maths Chapter 31 Probability Exercise 31A
No ratings yet
R S Aggarwal Solution Class 11 Maths Chapter 31 Probability Exercise 31A
9 pages
Dot Matrix Printer (DMP)
No ratings yet
Dot Matrix Printer (DMP)
12 pages
SOLUTION, SUSPENSION and COLLOID Activity Sheet
67% (3)
SOLUTION, SUSPENSION and COLLOID Activity Sheet
1 page
AquaLab 4 Water Activity Meter Manual
No ratings yet
AquaLab 4 Water Activity Meter Manual
129 pages
Clutch System
No ratings yet
Clutch System
14 pages
Gulfood Exhibitor List N 1
No ratings yet
Gulfood Exhibitor List N 1
19 pages
DE-13 - Quiz 8
No ratings yet
DE-13 - Quiz 8
2 pages
Applied Sciences: Fficiency Analysis of Manufacturing Line With
No ratings yet
Applied Sciences: Fficiency Analysis of Manufacturing Line With
15 pages
Mechanical Engineering Seminars
No ratings yet
Mechanical Engineering Seminars
1 page
Air Brake Rake Testing Procedure (LHB Coaches (2) - 0
No ratings yet
Air Brake Rake Testing Procedure (LHB Coaches (2) - 0
22 pages

Automatic Feature Selection

Uploaded by

Automatic Feature Selection

Uploaded by

Automatic Feature Selection

# Load the breast cancer dataset

# Create a deterministic random number generator

# Generate noise features

# Add noise features to the dataset

# Split the dataset into training and test sets

# Use SelectPercentile to select the top 50% of features

# Transform the training set to keep only the selected features

# Print the shapes of the original and selected training datasets

import matplotlib.pyplot as plt

# Load the breast cancer dataset

# Split the dataset into training and test sets

# Initialize SelectFromModel with RandomForestClassifier

# Fit the selector to the training data

# Transform the training and test sets

# Fit a RandomForest model on the selected features

# Print the accuracy on the test set with selected features

# Optional: Visualize the feature importances

# Load the breast cancer dataset

# Split the dataset into training and test sets

# Initialize RFE with RandomForestClassifier

# Fit the selector to the training data

# Get the mask of selected features

# Visualize the selected features

You might also like