0% found this document useful (0 votes)

3 views5 pages

Code

This document outlines a machine learning workflow using LightGBM for binary classification, including installation of necessary libraries, data loading, preprocessing, and hyperparameter optimization using Optuna. Key steps include scaling the data, defining an objective function for model training, and evaluating model performance based on AUC. The final model is trained with optimized parameters and predictions are made with a modified probability threshold for improved sensitivity.

Uploaded by

lankasunilkumar2004

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

3 views5 pages

Code

Uploaded by

lankasunilkumar2004

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 5

# 1.

Installation and Imports (Same as before)

!pip install pandas scikit-learn imbalanced-learn lightgbm optuna

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.experimental import enable_iterative_imputer #Commented

out

from sklearn.impute import IterativeImputer #Commented out

#from imblearn.over_sampling import BorderlineSMOTE #Commented out

from sklearn.metrics import classification_report, accuracy_score,

roc_auc_score # Using AUC more often than just accuracy.

import lightgbm as lgb

import optuna

from sklearn.preprocessing import StandardScaler, MinMaxScaler,

PolynomialFeatures # Added scalers

from sklearn.linear_model import LogisticRegression # Just one more

model

from sklearn.ensemble import RandomForestClassifier

# 2. Load the Preprocessed Dataset

data = pd.read_csv("processed_df.csv") # <------ REPLACE THIS WITH

YOUR PREPROCESSED DATASET PATH! AND SPECIFY SEPARATOR

#3 Define Target and Features

X = data.drop('cardio', axis=1)

y = data['cardio']

# 4. Train/Test Split (Same as before)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,

random_state=42)
# 5. Scaling the data

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

X_test_scaled = scaler.transform(X_test)

# 6. Imputation (OMITTED) (No more imputation in this version - be sure

to handle missing values previously)

#mice_imputer = IterativeImputer(random_state=0)

#X_train_imputed = mice_imputer.fit_transform(X_train_scaled) # Use the

scaled version.

#X_test_imputed = mice_imputer.transform(X_test_scaled) # Use the

scaled version.

X_train_imputed = X_train_scaled # Set imputted values as scaled values

in order to avoid bugs

X_test_imputed = X_test_scaled # Set imputted values as scaled values in

order to avoid bugs

# 7. Oversampling (OMITTED) (No BorderlineSMOTE)

#smote = BorderlineSMOTE(random_state=0)

#X_train_resampled, y_train_resampled =
smote.fit_resample(X_train_imputed, y_train) # SMOTE on SCALED and
IMPUTED data

X_train_resampled = X_train_imputed # set resampled values as inputted

values to avoid bugs

y_train_resampled = y_train # set resampled values as inputted values to

avoid bugs

# 8. Define the Objective Function for Hyperparameter Optimization

(Slight changes to Metrics)

def objective(trial):
params = {

'objective': 'binary',

'metric': 'auc', # Or try 'binary_logloss', 'binary_error'

'boosting_type': 'gbdt', # Experiment with 'dart' and 'goss'

'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 0.1),

'num_leaves': trial.suggest_int('num_leaves', 31, 200),

'max_depth': trial.suggest_int('max_depth', -1, 25),

'n_estimators': trial.suggest_int('n_estimators', 100, 500),

'min_child_samples': trial.suggest_int('min_child_samples', 10, 200),

'subsample': trial.suggest_float('subsample', 0.6, 1.0),

'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),

'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 5.0), # Lambda l1 is

now called reg_alpha

'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 5.0), #Lambda l2

is now called reg_lambda

'min_split_gain': trial.suggest_float('min_split_gain', 0.0, 1.0),

'max_bin': trial.suggest_int('max_bin', 100, 255)

dtrain = lgb.Dataset(X_train_resampled, label=y_train_resampled)

# Train the model with early stopping to prevent overfitting

model = lgb.train(

params,

dtrain,

valid_sets=[dtrain],

callbacks=[lgb.early_stopping(stopping_rounds=50, verbose=False)],
# Pass verbose=False to early_stopping

)
y_pred = model.predict(X_test_imputed) #Predict against the SCALED
test data.

return roc_auc_score(y_test, y_pred) # Optimize based on AUC

# 8. Optimize Hyperparameters (Same as before)

study = optuna.create_study(direction='maximize')

study.optimize(objective, n_trials=100, show_progress_bar=False) #Added

progress bar = false for readability

# 9. Train the Final Model (Slight Changes)

best_params = study.best_params

final_model = lgb.LGBMClassifier(**best_params)

final_model.fit(X_train_resampled, y_train_resampled)

# 10. Predictions and Evaluation (Use AUC and a more granular probability
threshold.)

y_pred_proba = final_model.predict_proba(X_test_imputed)[:, 1] # Get

probabilities for the positive class

y_pred = (y_pred_proba > 0.4).astype(int) # Tweak the threshold from 0.5

to 0.4 to try and get better sensitivity.

print("Best Parameters:", best_params)

print("Accuracy:", accuracy_score(y_test, y_pred))

print("AUC: ", roc_auc_score(y_test, y_pred_proba))

print(classification_report(y_test, y_pred))

Cinematography: Lighting
88% (24)
Cinematography: Lighting
77 pages
Interpretation of Geophysical Logs Coal.
No ratings yet
Interpretation of Geophysical Logs Coal.
16 pages
MlLabManualdocx 2024 09 04 22 02 58
No ratings yet
MlLabManualdocx 2024 09 04 22 02 58
19 pages
Experiment01 Baseline Models Accuracy
No ratings yet
Experiment01 Baseline Models Accuracy
35 pages
AI ML - Cycle 2 Programs
No ratings yet
AI ML - Cycle 2 Programs
15 pages
Shobit Sharma (2124399) ML Lab File PDF
No ratings yet
Shobit Sharma (2124399) ML Lab File PDF
19 pages
Program
No ratings yet
Program
19 pages
ML Codes
No ratings yet
ML Codes
9 pages
COMPARISON - Jupyter Notebook
No ratings yet
COMPARISON - Jupyter Notebook
5 pages
AI Lab9 22it3044
No ratings yet
AI Lab9 22it3044
21 pages
Setup: This Notebook Contains All The Sample Code and Solutions To The Exercises in Chapter 7
No ratings yet
Setup: This Notebook Contains All The Sample Code and Solutions To The Exercises in Chapter 7
23 pages
ML Lab 01999676272
No ratings yet
ML Lab 01999676272
12 pages
Aiml Practicals
No ratings yet
Aiml Practicals
22 pages
DL 3
No ratings yet
DL 3
5 pages
SVM K NN MLP With Sklearn Jupyter NoteBo
No ratings yet
SVM K NN MLP With Sklearn Jupyter NoteBo
22 pages
6 Task RBF
No ratings yet
6 Task RBF
6 pages
LAB-4 Report
No ratings yet
LAB-4 Report
21 pages
23UCC554
No ratings yet
23UCC554
9 pages
Supple Maximizing Performance in Cs CuBiCl
No ratings yet
Supple Maximizing Performance in Cs CuBiCl
5 pages
Practicalpgm ML
No ratings yet
Practicalpgm ML
33 pages
ML Manual With Outputs
No ratings yet
ML Manual With Outputs
30 pages
Classification Review
No ratings yet
Classification Review
8 pages
Index: Name - JINESH PRAJAPAT Class - B. Tech, III Year Branch - AI & DS Sem - V
No ratings yet
Index: Name - JINESH PRAJAPAT Class - B. Tech, III Year Branch - AI & DS Sem - V
35 pages
Data Mining Practicals
No ratings yet
Data Mining Practicals
22 pages
Unit2 ML Programs
No ratings yet
Unit2 ML Programs
7 pages
ML Practicals
No ratings yet
ML Practicals
21 pages
Btech1007022 Lab5.1
No ratings yet
Btech1007022 Lab5.1
9 pages
ML Lab Exp
No ratings yet
ML Lab Exp
7 pages
ML Algorithms
100% (1)
ML Algorithms
1 page
Data Analytics
No ratings yet
Data Analytics
10 pages
Atul MLT Exp 4-11
No ratings yet
Atul MLT Exp 4-11
17 pages
ML Lab 146
No ratings yet
ML Lab 146
50 pages
01 Machine Learning
No ratings yet
01 Machine Learning
25 pages
C2W3 Lab 01 Model Evaluation and Selection
No ratings yet
C2W3 Lab 01 Model Evaluation and Selection
21 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
C2W3 Lab 01 Model Evaluation and Selection
No ratings yet
C2W3 Lab 01 Model Evaluation and Selection
21 pages
Assignment 1: Q1. Task Description
No ratings yet
Assignment 1: Q1. Task Description
12 pages
Ann Experiential Learning
No ratings yet
Ann Experiential Learning
43 pages
DeepLearningLab - Ipynb - Colab
No ratings yet
DeepLearningLab - Ipynb - Colab
6 pages
Ensemble Learning
No ratings yet
Ensemble Learning
1 page
Linearregression SVM
No ratings yet
Linearregression SVM
3 pages
Time Series Forecasting Basic Eda Prediction
No ratings yet
Time Series Forecasting Basic Eda Prediction
4 pages
ML - LAB - 7 - Jupyter Notebook
100% (1)
ML - LAB - 7 - Jupyter Notebook
7 pages
Codigo Modelo
No ratings yet
Codigo Modelo
5 pages
Heart Disease Prediction
No ratings yet
Heart Disease Prediction
6 pages
Machine
100% (1)
Machine
45 pages
MlProject Cse 30 37
No ratings yet
MlProject Cse 30 37
27 pages
M.E Machine Learning - CP4252 Lab Manual4716718074353656238
No ratings yet
M.E Machine Learning - CP4252 Lab Manual4716718074353656238
26 pages
I Avaliação Parcial - 25.0 PTS - Gabarito
No ratings yet
I Avaliação Parcial - 25.0 PTS - Gabarito
9 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
Aiml Programs
No ratings yet
Aiml Programs
12 pages
To Improve The Performance of Models Predicting Ba
No ratings yet
To Improve The Performance of Models Predicting Ba
6 pages
Notebook - Main Code
No ratings yet
Notebook - Main Code
4 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
43 pages
Btech1007022 Lab5
No ratings yet
Btech1007022 Lab5
14 pages
ML Lab Programs
No ratings yet
ML Lab Programs
9 pages
AI Lab M.Tech
No ratings yet
AI Lab M.Tech
29 pages
ML Lab Manual
No ratings yet
ML Lab Manual
12 pages
Deep Learning Lab Manual-36-41
No ratings yet
Deep Learning Lab Manual-36-41
6 pages
21bit0706 VL2024250106861 Da
No ratings yet
21bit0706 VL2024250106861 Da
7 pages
Amazing Java: Learn Java Quickly
From Everand
Amazing Java: Learn Java Quickly
Andrei Besedin
No ratings yet
Introduction to PHP, Part 2, Second Edition
From Everand
Introduction to PHP, Part 2, Second Edition
Adam Majczak
No ratings yet
Ma3151 Matrices and Calculus Two Mark Questions 2
No ratings yet
Ma3151 Matrices and Calculus Two Mark Questions 2
14 pages
ND Computer Science
No ratings yet
ND Computer Science
224 pages
Break The Wall From Bottom: Automated Discovery of Protocol-Level Evasion Vulnerabilities in Web Application Firewalls
No ratings yet
Break The Wall From Bottom: Automated Discovery of Protocol-Level Evasion Vulnerabilities in Web Application Firewalls
50 pages
Chapter 3 Stacks
No ratings yet
Chapter 3 Stacks
28 pages
3-4 Gas Laws Int - Reader - Study - Guide PDF
No ratings yet
3-4 Gas Laws Int - Reader - Study - Guide PDF
6 pages
2014 Experimental Investigations and Thermodynamic Modelling of KCl-LiCl-UCl3 System
No ratings yet
2014 Experimental Investigations and Thermodynamic Modelling of KCl-LiCl-UCl3 System
16 pages
Formulation and Evaluation of Topical Herbal Gel For The Treatment
No ratings yet
Formulation and Evaluation of Topical Herbal Gel For The Treatment
16 pages
Design of Pulley and V Belt
100% (1)
Design of Pulley and V Belt
12 pages
Dynamic Behavior of Materials, Volume 1: Leslie E. Lamberson Editor
No ratings yet
Dynamic Behavior of Materials, Volume 1: Leslie E. Lamberson Editor
218 pages
Crude Oil Conversion Table
No ratings yet
Crude Oil Conversion Table
61 pages
Types of Modulator
No ratings yet
Types of Modulator
31 pages
Criminalistics Review materialsLATEST
No ratings yet
Criminalistics Review materialsLATEST
60 pages
Manual Motor DC (Siemens)
No ratings yet
Manual Motor DC (Siemens)
104 pages
SD Mill
No ratings yet
SD Mill
10 pages
Ae 2 PDF
No ratings yet
Ae 2 PDF
4 pages
History of Computing
No ratings yet
History of Computing
3 pages
Auchi Poly Semester Result 2019
No ratings yet
Auchi Poly Semester Result 2019
1 page
I-O List
No ratings yet
I-O List
6 pages
Unit 9 Vocabulary
No ratings yet
Unit 9 Vocabulary
34 pages
(Lab Report) : Experiment 03
No ratings yet
(Lab Report) : Experiment 03
20 pages
Basic Discrete Structure
100% (1)
Basic Discrete Structure
57 pages
Design of RF to DC conversion circuit for energy harvesting in CMOS 0.13-μm technology
No ratings yet
Design of RF to DC conversion circuit for energy harvesting in CMOS 0.13-μm technology
11 pages
SPDD and SPAU
No ratings yet
SPDD and SPAU
5 pages
JOUR213 Answers Fall 2020 6
No ratings yet
JOUR213 Answers Fall 2020 6
4 pages
Geotechnical Characteristics of Copper Mine Tailings: A Case Study
No ratings yet
Geotechnical Characteristics of Copper Mine Tailings: A Case Study
13 pages
Capacity Design
No ratings yet
Capacity Design
12 pages

Code

Uploaded by

Code

Uploaded by

# 1.

Installation and Imports (Same as before)

!pip install pandas scikit-learn imbalanced-learn lightgbm optuna

from sklearn.model_selection import train_test_split

from sklearn.experimental import enable_iterative_imputer #Commented

from sklearn.impute import IterativeImputer #Commented out

#from imblearn.over_sampling import BorderlineSMOTE #Commented out

from sklearn.metrics import classification_report, accuracy_score,

import lightgbm as lgb

from sklearn.preprocessing import StandardScaler, MinMaxScaler,

from sklearn.linear_model import LogisticRegression # Just one more

from sklearn.ensemble import RandomForestClassifier

# 2. Load the Preprocessed Dataset

data = pd.read_csv("processed_df.csv") # <------ REPLACE THIS WITH

#3 Define Target and Features

# 4. Train/Test Split (Same as before)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,

# 6. Imputation (OMITTED) (No more imputation in this version - be sure

#X_train_imputed = mice_imputer.fit_transform(X_train_scaled) # Use the

#X_test_imputed = mice_imputer.transform(X_test_scaled) # Use the

X_train_imputed = X_train_scaled # Set imputted values as scaled values

X_test_imputed = X_test_scaled # Set imputted values as scaled values in

# 7. Oversampling (OMITTED) (No BorderlineSMOTE)

X_train_resampled = X_train_imputed # set resampled values as inputted

y_train_resampled = y_train # set resampled values as inputted values to

# 8. Define the Objective Function for Hyperparameter Optimization

'metric': 'auc', # Or try 'binary_logloss', 'binary_error'

'boosting_type': 'gbdt', # Experiment with 'dart' and 'goss'

'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 0.1),

'num_leaves': trial.suggest_int('num_leaves', 31, 200),

'max_depth': trial.suggest_int('max_depth', -1, 25),

'n_estimators': trial.suggest_int('n_estimators', 100, 500),

'min_child_samples': trial.suggest_int('min_child_samples', 10, 200),

'subsample': trial.suggest_float('subsample', 0.6, 1.0),

'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),

'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 5.0), # Lambda l1 is

'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 5.0), #Lambda l2

'min_split_gain': trial.suggest_float('min_split_gain', 0.0, 1.0),

'max_bin': trial.suggest_int('max_bin', 100, 255)

dtrain = lgb.Dataset(X_train_resampled, label=y_train_resampled)

# Train the model with early stopping to prevent overfitting

return roc_auc_score(y_test, y_pred) # Optimize based on AUC

# 8. Optimize Hyperparameters (Same as before)

study.optimize(objective, n_trials=100, show_progress_bar=False) #Added

# 9. Train the Final Model (Slight Changes)

y_pred_proba = final_model.predict_proba(X_test_imputed)[:, 1] # Get

y_pred = (y_pred_proba > 0.4).astype(int) # Tweak the threshold from 0.5

print("Best Parameters:", best_params)

print("Accuracy:", accuracy_score(y_test, y_pred))

print("AUC: ", roc_auc_score(y_test, y_pred_proba))

You might also like