0% found this document useful (0 votes)

25 views5 pages

02 B Regression Healthcare

Uploaded by

rayachotiusa

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

25 views5 pages

02 B Regression Healthcare

Uploaded by

rayachotiusa

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 5

Healthcare Data

-----------------------------------------------------------------------------------
--------------------------------------------------
# Importing the libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Ignore harmless warnings

import warnings
warnings.filterwarnings("ignore")

# Set to display all the columns in dataset

pd.set_option("display.max_columns", None)

# Import psql to run queries

import pandasql as psql

-----------------------------------------------------------------------------------
--------------------------------------------------
# load the Health Insurance dataset

HealthIns = pd.read_csv(r"E:\R3SPAnalytics\00 IIT KGP Hyd\00-LATA\Datasets\

Health_Ins_Expenses.csv", header=0)

# Copy to back-up file

HealthIns_BK = HealthIns.copy()

# Display the first 5 records

HealthIns.head()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the dataset information

HealthIns.info()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Displaying Duplicate values with in dataset

HealthIns_dup = HealthIns[HealthIns.duplicated(keep='last')]

# Display the duplicate records

HealthIns_dup
-----------------------------------------------------------------------------------
--------------------------------------------------
# Remove the identified duplicate records

HealthIns = HealthIns.drop_duplicates()

# Display the shape of the dataset

HealthIns.shape
-----------------------------------------------------------------------------------
--------------------------------------------------
# Re-setting the row index

HealthIns = HealthIns.reset_index(drop=True)

# Copy file to back-up file after deletion of duplicate records

HealthIns_BK2 = HealthIns.copy()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the dataset information after delection of duplicates

HealthIns.info()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the unique values of the all the variables

HealthIns.nunique()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the missing values information of variables

HealthIns.isnull().sum()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display 'Gender' categorical variable

HealthIns['Gender'].value_counts()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Replace 'Gender' variable and convert to integer value.

HealthIns['Gender'] = HealthIns['Gender'].str.replace('female', '0')

HealthIns['Gender'] = HealthIns['Gender'].str.replace('male', '1')
HealthIns['Gender'] = HealthIns['Gender'].astype(int)
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display 'Smoker' categorical variable

HealthIns['Smoker'].value_counts()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Replace 'Smoker' variable and convert as a integer value.

HealthIns['Smoker'] = HealthIns['Smoker'].str.replace('no', '0')

HealthIns['Smoker'] = HealthIns['Smoker'].str.replace('yes', '1')
HealthIns['Smoker'] = HealthIns['Smoker'].astype(int)
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display 'Region' categorical variable

HealthIns['Region'].value_counts()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Replace 'Region' variable and convert to integer value.

HealthIns['Region'] = HealthIns['Region'].str.replace('northeast', '0')

HealthIns['Region'] = HealthIns['Region'].str.replace('northwest', '1')
HealthIns['Region'] = HealthIns['Region'].str.replace('southeast', '2')
HealthIns['Region'] = HealthIns['Region'].str.replace('southwest', '3')
HealthIns['Region'] = HealthIns['Region'].astype(int)
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the dataset information after transformation of data

del HealthIns['Record_ID']

# Display first 5 records

HealthIns.head()
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display all the column variables

HealthIns.columns
-----------------------------------------------------------------------------------
--------------------------------------------------
# Identify the variables for scaling (MinMaxScalar)

cols = ['Age', 'BMI', 'Children','Region']

-----------------------------------------------------------------------------------
--------------------------------------------------
# Identify the independent and Target (dependent) variables

IndepVar = []
for col in HealthIns.columns:
if col != 'Expenses':
IndepVar.append(col)

TargetVar = 'Expenses'

x = HealthIns[IndepVar]
y = HealthIns[TargetVar]
-----------------------------------------------------------------------------------
--------------------------------------------------
# Split the data into train and test (random sampling)

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,

random_state=42)

# Display the shape for train & test data

x_train.shape, x_test.shape, y_train.shape, y_test.shape

-----------------------------------------------------------------------------------
--------------------------------------------------
# Scaling the features by using MinMaxScaler

from sklearn.preprocessing import MinMaxScaler

mmscaler = MinMaxScaler(feature_range=(0, 1))

x_train[cols] = mmscaler.fit_transform(x_train[cols])
#x_train = mmscaler.fit_transform(x_train)
x_train = pd.DataFrame(x_train)

x_test[cols] = mmscaler.fit_transform(x_test[cols])
#x_test = mmscaler.fit_transform(x_test)
x_test = pd.DataFrame(x_test)
-----------------------------------------------------------------------------------
--------------------------------------------------
Multiple Regression Algorithm
-----------------------------------------------------------------------------------
--------------------------------------------------
# Train the algorithm and build the model with train dataset

from sklearn.linear_model import LinearRegression

# Create an object for regression model

ModelRGR = LinearRegression()

# Train the model with training dataset

ModelRGR.fit(x_train, y_train)

# Predict the model with test dataset

y_pred = ModelRGR.predict(x_test)

# Evaluation metrics for Regression model

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test,

y_pred),3))
print('Mean Squared Error (MSE):', round(metrics.mean_squared_error(y_test,
y_pred),3))
print('Root Mean Squared Error (RMSE):',
round(np.sqrt(metrics.mean_squared_error(y_test, y_pred)),3))
print('R2_score:', round(metrics.r2_score(y_test, y_pred),6))
print('Mean Absolute Percentage Error (MAPE):',
round(metrics.mean_absolute_percentage_error(y_test, y_pred)*100,3), '%')
print('Root Mean Squared Log Error (RMSLE):',
round(np.log(np.sqrt(metrics.mean_squared_error(y_test, y_pred))),3))

# Define the function to calculate the MAPE - Mean Absolute Percentage Error

def MAPE(y_test, y_pred):

y_test, y_pred = np.array(y_test), np.array(y_pred)
return np.mean(np.abs((y_test - y_pred) / y_test)) * 100

# Evaluation of MAPE

result = MAPE(y_test, y_pred)

print('Mean Absolute Percentage Error (MAPE):', round(result, 3), '%')

# Calculate Adjusted R squared values

r_squared = round(metrics.r2_score(y_test, y_pred),6)

adjusted_r_squared = round(1 - (1-r_squared)*(len(y)-1)/(len(y)-x.shape[1]-1),6)
print('Adj R Square: ', adjusted_r_squared)
-----------------------------------------------------------------------------------
--------------------------------------------------
# Display the Final results

Results = pd.DataFrame({'Expenses_A':y_test, 'Expenses_P':y_pred})

# Merge two Dataframes on index of both the dataframes

ResultsFinal = HealthIns_BK2.merge(Results, left_index=True, right_index=True)

# Display 10 records randomly

ResultsFinal.sample(5)
-----------------------------------------------------------------------------------
--------------------------------------------------
# Calculate the %of Error

ResultsFinal['%Error'] = round(((ResultsFinal['Expenses_A']-
ResultsFinal['Expenses_P'])/ResultsFinal['Expenses_A'])*100,3)

# Display the random 5 records

ResultsFinal.sample(5)
-----------------------------------------------------------------------------------
--------------------------------------------------

02 B Regression Healthcare
No ratings yet
02 B Regression Healthcare
5 pages
SML Lab 1
No ratings yet
SML Lab 1
19 pages
Linear and Multilinear Regression
No ratings yet
Linear and Multilinear Regression
5 pages
Batch-2 Ieee DMT
No ratings yet
Batch-2 Ieee DMT
4 pages
Data Science Record - 05
No ratings yet
Data Science Record - 05
20 pages
Stroke Prediction
No ratings yet
Stroke Prediction
10 pages
Heart Disease Diagnosis Using Machine Learning
No ratings yet
Heart Disease Diagnosis Using Machine Learning
26 pages
ML Manual Final
No ratings yet
ML Manual Final
35 pages
Project Paarth
No ratings yet
Project Paarth
21 pages
ML Lab Codes
No ratings yet
ML Lab Codes
14 pages
Healthcare Insurance Prediction Main
No ratings yet
Healthcare Insurance Prediction Main
74 pages
Cardio Screen RF
100% (1)
Cardio Screen RF
27 pages
ML LAB Manual-1
No ratings yet
ML LAB Manual-1
33 pages
ML Complete Notes Hridoy
No ratings yet
ML Complete Notes Hridoy
5 pages
B58 - Handling Missing Values, Feature - Selection
No ratings yet
B58 - Handling Missing Values, Feature - Selection
4 pages
New Text Document
No ratings yet
New Text Document
7 pages
Group Work Assignment Supervised and Unsupervised Learning
No ratings yet
Group Work Assignment Supervised and Unsupervised Learning
10 pages
Gaurav - Data Mining Lab Assignment
No ratings yet
Gaurav - Data Mining Lab Assignment
36 pages
Sla4a 21im30005
No ratings yet
Sla4a 21im30005
11 pages
'Name-Piyush Tiwari''/n' 'Section - C'/N' 'Roll - No-2001610100142'
No ratings yet
'Name-Piyush Tiwari''/n' 'Section - C'/N' 'Roll - No-2001610100142'
28 pages
Profitanalysis
No ratings yet
Profitanalysis
18 pages
Health Insurance Lead Prediction
No ratings yet
Health Insurance Lead Prediction
21 pages
DA Lab
No ratings yet
DA Lab
27 pages
KNN - Jupyter Notebook
No ratings yet
KNN - Jupyter Notebook
5 pages
Python 1
No ratings yet
Python 1
3 pages
Data Analysis in Python-3
No ratings yet
Data Analysis in Python-3
4 pages
Assignment 03
No ratings yet
Assignment 03
6 pages
Python Cod1
No ratings yet
Python Cod1
3 pages
Import Pandas As PD
No ratings yet
Import Pandas As PD
3 pages
Logistic Regression
No ratings yet
Logistic Regression
12 pages
Data Science Fundamentals
No ratings yet
Data Science Fundamentals
22 pages
DA Programs
No ratings yet
DA Programs
44 pages
ML 1-11
No ratings yet
ML 1-11
27 pages
Ash Regression
No ratings yet
Ash Regression
11 pages
DSBDA Prac4 2
No ratings yet
DSBDA Prac4 2
1 page
Ass 1 Dsbda
No ratings yet
Ass 1 Dsbda
8 pages
Diabetic Prediction Using LogicalRegression
No ratings yet
Diabetic Prediction Using LogicalRegression
9 pages
ML Proj Diabetes
No ratings yet
ML Proj Diabetes
51 pages
Medical
No ratings yet
Medical
4 pages
Subset Selection Class Assignment
No ratings yet
Subset Selection Class Assignment
5 pages
FYMCA IDSLab A6 Submission
No ratings yet
FYMCA IDSLab A6 Submission
9 pages
Data Pre-Processing
No ratings yet
Data Pre-Processing
22 pages
DataAnalytics Lab Manual
No ratings yet
DataAnalytics Lab Manual
35 pages
Program
No ratings yet
Program
10 pages
Prg7a - Jupyter Notebook
No ratings yet
Prg7a - Jupyter Notebook
12 pages
Da Lab Mannual
No ratings yet
Da Lab Mannual
25 pages
Week1 Code Corrected
No ratings yet
Week1 Code Corrected
2 pages
Data Analytics Lab Manual
No ratings yet
Data Analytics Lab Manual
26 pages
Logistic Regression 205
No ratings yet
Logistic Regression 205
8 pages
Aiml Programs
No ratings yet
Aiml Programs
12 pages
Machine Learning Basics 1683717543
No ratings yet
Machine Learning Basics 1683717543
15 pages
DSBDA Practicals
No ratings yet
DSBDA Practicals
16 pages
KNN For Classification
No ratings yet
KNN For Classification
5 pages
Coloring Fruits
No ratings yet
Coloring Fruits
15 pages
DSBDA4
No ratings yet
DSBDA4
6 pages
Assignment AI-ML
No ratings yet
Assignment AI-ML
13 pages
Lab Manual - MachineLearningLaboratory-DR - Vaishnavi
No ratings yet
Lab Manual - MachineLearningLaboratory-DR - Vaishnavi
71 pages
Sanket ML Assign1
No ratings yet
Sanket ML Assign1
9 pages
Vedant, Aiml
No ratings yet
Vedant, Aiml
63 pages
Ensemble Methods
No ratings yet
Ensemble Methods
32 pages
Citsit
No ratings yet
Citsit
16 pages
Jurnal Kualitas Produk Terhadap Loyalitas Pelanggan
No ratings yet
Jurnal Kualitas Produk Terhadap Loyalitas Pelanggan
7 pages
Stat 151 Formulas
100% (1)
Stat 151 Formulas
3 pages
Monte Carlo Simulation - Central Limit Theorem
No ratings yet
Monte Carlo Simulation - Central Limit Theorem
5 pages
Statistics Solved MCQs (Set-1)
No ratings yet
Statistics Solved MCQs (Set-1)
8 pages
ML - Question Bank
No ratings yet
ML - Question Bank
4 pages
Lesson 2.1 - Understanding Normal Curve
No ratings yet
Lesson 2.1 - Understanding Normal Curve
16 pages
Sadat Chi Square Testing For A Discrete Probability Distribution Start
No ratings yet
Sadat Chi Square Testing For A Discrete Probability Distribution Start
12 pages
11 Regression JASP
100% (1)
11 Regression JASP
35 pages
A Regression Analysis Investigating The Relationship Between Income and Happiness
No ratings yet
A Regression Analysis Investigating The Relationship Between Income and Happiness
7 pages
Statistics and Probability Reviewer
No ratings yet
Statistics and Probability Reviewer
6 pages
Boys
No ratings yet
Boys
3 pages
Assignment .2. STA301 Rimsha Hameed
No ratings yet
Assignment .2. STA301 Rimsha Hameed
5 pages
Akaike's Information Criterion For Estimated Model - MATLAB Aic
No ratings yet
Akaike's Information Criterion For Estimated Model - MATLAB Aic
5 pages
Examination - SPC Total Allowed Time:1.5 Hours
No ratings yet
Examination - SPC Total Allowed Time:1.5 Hours
3 pages
Descriptive Statistics: Innomatics Research Lab
No ratings yet
Descriptive Statistics: Innomatics Research Lab
78 pages
4.2 Correlation Regression TQ
No ratings yet
4.2 Correlation Regression TQ
9 pages
ISDS 361A - Chapter 1 PDF
No ratings yet
ISDS 361A - Chapter 1 PDF
23 pages
MGT555 - Group 5 Assignment Report
No ratings yet
MGT555 - Group 5 Assignment Report
22 pages
Fin
No ratings yet
Fin
2 pages
Formal Experimental Research Design
80% (5)
Formal Experimental Research Design
13 pages
Quantitative Research Design
No ratings yet
Quantitative Research Design
4 pages
Logistic Regration 17 Sep-24
No ratings yet
Logistic Regration 17 Sep-24
37 pages
Forecasting Forecasting Forecasting Forecasting: Production Planning & Control Production Planning & Control
No ratings yet
Forecasting Forecasting Forecasting Forecasting: Production Planning & Control Production Planning & Control
80 pages
OBE Syllabus Biostatistics
No ratings yet
OBE Syllabus Biostatistics
8 pages
The Population Is The Set of Entities Under Study
No ratings yet
The Population Is The Set of Entities Under Study
2 pages
ST4250 23S1 Assignment 2
No ratings yet
ST4250 23S1 Assignment 2
2 pages
DP2 AI SL Formative 3
No ratings yet
DP2 AI SL Formative 3
3 pages
Chapter No. 03 Experiments With A Single Factor - The Analysis of Variance (Presentation)
No ratings yet
Chapter No. 03 Experiments With A Single Factor - The Analysis of Variance (Presentation)
81 pages

02 B Regression Healthcare

Uploaded by

02 B Regression Healthcare

Uploaded by

Healthcare Data

# Ignore harmless warnings

# Set to display all the columns in dataset

# Import psql to run queries

import pandasql as psql

HealthIns = pd.read_csv(r"E:\R3SPAnalytics\00 IIT KGP Hyd\00-LATA\Datasets\

# Copy to back-up file

# Display the first 5 records

# Display the duplicate records

# Display the shape of the dataset

# Copy file to back-up file after deletion of duplicate records

HealthIns['Gender'] = HealthIns['Gender'].str.replace('female', '0')

HealthIns['Smoker'] = HealthIns['Smoker'].str.replace('no', '0')

HealthIns['Region'] = HealthIns['Region'].str.replace('northeast', '0')

# Display first 5 records

cols = ['Age', 'BMI', 'Children','Region']

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,

# Display the shape for train & test data

x_train.shape, x_test.shape, y_train.shape, y_test.shape

from sklearn.preprocessing import MinMaxScaler

mmscaler = MinMaxScaler(feature_range=(0, 1))

from sklearn.linear_model import LinearRegression

# Create an object for regression model

# Train the model with training dataset

# Predict the model with test dataset

# Evaluation metrics for Regression model

from sklearn import metrics

print('Mean Absolute Error (MAE):', round(metrics.mean_absolute_error(y_test,

def MAPE(y_test, y_pred):

result = MAPE(y_test, y_pred)

# Calculate Adjusted R squared values

r_squared = round(metrics.r2_score(y_test, y_pred),6)

Results = pd.DataFrame({'Expenses_A':y_test, 'Expenses_P':y_pred})

# Merge two Dataframes on index of both the dataframes

ResultsFinal = HealthIns_BK2.merge(Results, left_index=True, right_index=True)

# Display 10 records randomly

# Display the random 5 records

You might also like