0% found this document useful (0 votes)

11 views4 pages

Data Exploration and Regression in Python With HBAT Dataset

Uploaded by

mani

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

11 views4 pages

Data Exploration and Regression in Python With HBAT Dataset

Uploaded by

mani

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 4

#importing the libraries

import pandas as pd
import numpy as np
import statsmodels.api as sm
data = pd.read_csv('C:/Files/My Courses/MBA/Amrita/Coimbatore/DARP/DARP2024-25/Data
Files/HBAT.csv', sep=',')
var_names=data = pd.read_csv('C:/Files/My Courses/MBA/Amrita/Coimbatore/DARP/DARP2024-
25/Data Files/variablenames.csv', sep=',')
# Data Exploration
# data description
data.shape#dimention of the data set
len(data)#length of the data
data.info()# data structure
data.columns# coumn names
data.head(10)# first 10 rows
data.tail(10)# last 10 rows
# standardization
var_names
from scipy import stats
data['z_x6'] = stats.zscore(data['x6'])
data['z_x7'] = stats.zscore(data['x7'])
data['z_x8'] = stats.zscore(data['x8'])
data['z_x9'] = stats.zscore(data['x9'])
data['z_x10'] = stats.zscore(data['x10'])
data['z_x11'] = stats.zscore(data['x11'])
data['z_x12'] = stats.zscore(data['x12'])
data['z_x13'] = stats.zscore(data['x13'])
data['z_x14'] = stats.zscore(data['x14'])
data['z_x15'] = stats.zscore(data['x15'])
data['z_x16'] = stats.zscore(data['x16'])
data['z_x17'] = stats.zscore(data['x17'])
data['z_x18'] = stats.zscore(data['x18'])
data['z_x19'] = stats.zscore(data['x19'])

#Boxplot
import matplotlib.pyplot as plt
plt.boxplot(data.z_x6, labels=['x6'])
plt.boxplot(data.z_x7,labels=['x7'])
#there are outliers in x7
plt.boxplot(data.z_x8,labels=['x8'])
plt.boxplot(data.z_x9,labels=['x9'])
plt.boxplot(data.z_x10,labels=['x10'])
plt.boxplot(data.z_x11,labels=['x11'])
plt.boxplot(data.z_x12,labels=['x12'])
#there are outliers in x12
plt.boxplot(data.z_x13,labels=['x13'])
plt.boxplot(data.z_x14,labels=['x14'])
plt.boxplot(data.z_x15,labels=['x15'])
plt.boxplot(data.z_x16,labels=['x16'])
#there are outliers in x16
plt.boxplot(data.z_x17,labels=['x17'])
plt.boxplot(data.z_x18,labels=['x18'])
#there is an outlier in x18
plt.boxplot(data.z_x19,labels=['x19'])

#there are outliers in x7,x12,x16,x18

#x7
#Outlier detection
# finding the 1st quartile
#z_x7
x7_q1 = np.quantile(data.z_x7, 0.25)
# finding the 3rd quartile
x7_q3 = np.quantile(data.z_x7, 0.75)
x7_med = np.median(data.z_x7)

# finding the iqr region

x7_iqr = x7_q3-x7_q1

# finding upper and lower whiskers

x7_upper_bound = x7_q3+(1.5*x7_iqr)
x7_lower_bound = x7_q1-(1.5*x7_iqr)
print(x7_iqr, x7_upper_bound, x7_lower_bound)

# Create arrays of Boolean values indicating the outlier rows

x7_upper_array = np.where(data['z_x7'] >= x7_upper_bound)[0]
x7_lower_array = np.where(data['z_x7'] <= x7_lower_bound)[0]

print(x7_upper_array)
print(x7_lower_array)

#x12
#Outlier detection
# finding the 1st quartile
#z_x12
x12_q1 = np.quantile(data.z_x12, 0.25)
# finding the 3rd quartile
x12_q3 = np.quantile(data.z_x12, 0.75)
x12_med = np.median(data.z_x12)

# finding the iqr region

x12_iqr = x12_q3-x12_q1

# finding upper and lower whiskers

x12_upper_bound = x12_q3+(1.5*x12_iqr)
x12_lower_bound = x12_q1-(1.5*x12_iqr)
print(x12_iqr, x12_upper_bound, x12_lower_bound)

# Create arrays of Boolean values indicating the outlier rows

x12_upper_array = np.where(data['z_x12'] >= x12_upper_bound)[0]
x12_lower_array = np.where(data['z_x12'] <= x12_lower_bound)[0]

print(x12_upper_array)
print(x12_lower_array)

#z_x16
x16_q1 = np.quantile(data.z_x16, 0.25)
# finding the 3rd quartile
x16_q3 = np.quantile(data.z_x16, 0.75)
x16_med = np.median(data.z_x16)

# finding the iqr region

x16_iqr = x16_q3-x16_q1

# finding upper and lower whiskers

x16_upper_bound = x16_q3+(1.5*x16_iqr)
x16_lower_bound = x16_q1-(1.5*x16_iqr)
print(x16_iqr, x16_upper_bound, x16_lower_bound)

# Create arrays of Boolean values indicating the outlier rows

x16_upper_array = np.where(data['z_x16'] >= x16_upper_bound)[0]
x16_lower_array = np.where(data['z_x16'] <= x16_lower_bound)[0]

print(x16_upper_array)
print(x16_lower_array)

#x18
#Outlier detection
# finding the 1st quartile
#z_x18
x18_q1 = np.quantile(data.z_x18, 0.25)
# finding the 3rd quartile
x18_q3 = np.quantile(data.z_x18, 0.75)
x18_med = np.median(data.z_x18)

# finding the iqr region

x18_iqr = x18_q3-x18_q1

# finding upper and lower whiskers

x18_upper_bound = x18_q3+(1.5*x18_iqr)
x18_lower_bound = x18_q1-(1.5*x18_iqr)
print(x18_iqr, x18_upper_bound, x18_lower_bound)

# Create arrays of Boolean values indicating the outlier rows

x18_upper_array = np.where(data['z_x18'] >= x18_upper_bound)[0]
x18_lower_array = np.where(data['z_x18'] <= x18_lower_bound)[0]

print(x18_upper_array)
print(x18_lower_array)
outliers = np.concatenate((x7_upper_array, x7_lower_array,x12_upper_array,
x12_lower_array,x16_upper_array, x16_lower_array,x18_upper_array, x18_lower_array))
print(outliers)
#removing the duplicates
out= list(np.unique(outliers))
print(out)
#removing the outliers
data1=data.drop(index=out)
# Regression analysis
x=
pd.DataFrame(data1[['z_x6','z_x7','z_x8','z_x9','z_x10','z_x11','z_x12','z_x13','z_x14','z_x15','z_x16','z
_x17','z_x18']])
y = pd.DataFrame(data1[['z_x19']])
x = sm.add_constant(x)
#Estimation
model01 = sm.OLS(y, x).fit()
# results
model01.summary()

Stainless Steel Pipe Weight Per Meter and Pipe Thickness Chart in MM
No ratings yet
Stainless Steel Pipe Weight Per Meter and Pipe Thickness Chart in MM
4 pages
284 - en - 51 - en - Tozen Dolenex Rubber Flexible Joint We16-1405
No ratings yet
284 - en - 51 - en - Tozen Dolenex Rubber Flexible Joint We16-1405
1 page
[www.crackjee.xyz] Indefinite Integration
No ratings yet
[www.crackjee.xyz] Indefinite Integration
10 pages
Cover Data Dukung
No ratings yet
Cover Data Dukung
1 page
Merge_Sort with Graph
No ratings yet
Merge_Sort with Graph
6 pages
Investment Strategies
No ratings yet
Investment Strategies
2 pages
PD AdobeAnalytics 2022-12-15
No ratings yet
PD AdobeAnalytics 2022-12-15
9 pages
Final Aruba Instant On Optical Guide - 040324
No ratings yet
Final Aruba Instant On Optical Guide - 040324
26 pages
TC-H326S 配置33XIE+AV3.0
No ratings yet
TC-H326S 配置33XIE+AV3.0
4 pages
S4D480_EN_Col21
No ratings yet
S4D480_EN_Col21
270 pages
Classes: User Manual
No ratings yet
Classes: User Manual
4 pages
The Emerging Role of The CISO
No ratings yet
The Emerging Role of The CISO
7 pages
Thuraya Marinestar Firmware S1 1.1 Upgrade Userguide
No ratings yet
Thuraya Marinestar Firmware S1 1.1 Upgrade Userguide
10 pages
03 - Authentication-Registration
No ratings yet
03 - Authentication-Registration
14 pages
LED Flasher
No ratings yet
LED Flasher
4 pages
Auto Body Repair Technology: Ch. 15,16 Key Terms
No ratings yet
Auto Body Repair Technology: Ch. 15,16 Key Terms
1 page
Libreoffice Writer MCQ
100% (2)
Libreoffice Writer MCQ
6 pages
Design Core Competence Diagnosis: A Case From The Automotive Industry
No ratings yet
Design Core Competence Diagnosis: A Case From The Automotive Industry
15 pages
Water Softener Manual
No ratings yet
Water Softener Manual
24 pages
Operators in Oracle
No ratings yet
Operators in Oracle
12 pages
PMP s8 2016 v55 Quality
No ratings yet
PMP s8 2016 v55 Quality
54 pages
B.E. Electrical Engg Sem VII VIII
No ratings yet
B.E. Electrical Engg Sem VII VIII
99 pages
Mark Zuckerberg, Founder of Facebook
No ratings yet
Mark Zuckerberg, Founder of Facebook
2 pages
Testing U2
No ratings yet
Testing U2
14 pages
Bat 5
No ratings yet
Bat 5
15 pages
(FREE) Launch Easydiag Full Activation Step by Step
50% (2)
(FREE) Launch Easydiag Full Activation Step by Step
3 pages
A117 Ca910 - en P
No ratings yet
A117 Ca910 - en P
218 pages
Enterprise Resource Planning - Lecture Notes, Study Material and Important Questions, Answers
No ratings yet
Enterprise Resource Planning - Lecture Notes, Study Material and Important Questions, Answers
4 pages
TPMS
No ratings yet
TPMS
60 pages
Starship Manual
No ratings yet
Starship Manual
24 pages
The Subtle Art of Not Giving a F*ck: A Counterintuitive Approach to Living a Good Life
From Everand
The Subtle Art of Not Giving a F*ck: A Counterintuitive Approach to Living a Good Life
Mark Manson
4/5 (6441)
Principles: Life and Work
From Everand
Principles: Life and Work
Ray Dalio
4/5 (642)
Never Split the Difference: Negotiating As If Your Life Depended On It
From Everand
Never Split the Difference: Negotiating As If Your Life Depended On It
Chris Voss
4.5/5 (999)
The Hard Thing About Hard Things: Building a Business When There Are No Easy Answers
From Everand
The Hard Thing About Hard Things: Building a Business When There Are No Easy Answers
Ben Horowitz
4.5/5 (361)
The Gifts of Imperfection: Let Go of Who You Think You're Supposed to Be and Embrace Who You Are
From Everand
The Gifts of Imperfection: Let Go of Who You Think You're Supposed to Be and Embrace Who You Are
Brene Brown
4/5 (1174)
Elon Musk: Tesla, SpaceX, and the Quest for a Fantastic Future
From Everand
Elon Musk: Tesla, SpaceX, and the Quest for a Fantastic Future
Ashlee Vance
4.5/5 (581)
The Emperor of All Maladies: A Biography of Cancer
From Everand
The Emperor of All Maladies: A Biography of Cancer
Siddhartha Mukherjee
4.5/5 (298)
Fear: Trump in the White House
From Everand
Fear: Trump in the White House
Bob Woodward
3.5/5 (836)
Shoe Dog: A Memoir by the Creator of Nike
From Everand
Shoe Dog: A Memoir by the Creator of Nike
Phil Knight
4.5/5 (628)
The Unwinding: An Inner History of the New America
From Everand
The Unwinding: An Inner History of the New America
George Packer
4/5 (45)
Rise of ISIS: A Threat We Can't Ignore
From Everand
Rise of ISIS: A Threat We Can't Ignore
Jay Sekulow
3.5/5 (144)
Her Body and Other Parties: Stories
From Everand
Her Body and Other Parties: Stories
Carmen Maria Machado
4/5 (903)
Team of Rivals: The Political Genius of Abraham Lincoln
From Everand
Team of Rivals: The Political Genius of Abraham Lincoln
Doris Kearns Goodwin
4.5/5 (244)
Hidden Figures: The American Dream and the Untold Story of the Black Women Mathematicians Who Helped Win the Space Race
From Everand
Hidden Figures: The American Dream and the Untold Story of the Black Women Mathematicians Who Helped Win the Space Race
Margot Lee Shetterly
4/5 (1018)
The Yellow House: A Memoir (2019 National Book Award Winner)
From Everand
The Yellow House: A Memoir (2019 National Book Award Winner)
Sarah M. Broom
4/5 (100)
Sing, Unburied, Sing: A Novel
From Everand
Sing, Unburied, Sing: A Novel
Jesmyn Ward
4/5 (1267)
The World Is Flat 3.0: A Brief History of the Twenty-first Century
From Everand
The World Is Flat 3.0: A Brief History of the Twenty-first Century
Thomas L. Friedman
3.5/5 (2289)
Bad Feminist: Essays
From Everand
Bad Feminist: Essays
Roxane Gay
4/5 (1090)
Steve Jobs
From Everand
Steve Jobs
Walter Isaacson
4.5/5 (1138)
Devil in the Grove: Thurgood Marshall, the Groveland Boys, and the Dawn of a New America
From Everand
Devil in the Grove: Thurgood Marshall, the Groveland Boys, and the Dawn of a New America
Gilbert King
4.5/5 (279)
Manhattan Beach: A Novel
From Everand
Manhattan Beach: A Novel
Jennifer Egan
3.5/5 (919)
John Adams
From Everand
John Adams
David McCullough
4.5/5 (2546)
The Glass Castle: A Memoir
From Everand
The Glass Castle: A Memoir
Jeannette Walls
4.5/5 (1856)
A Heartbreaking Work Of Staggering Genius: A Memoir Based on a True Story
From Everand
A Heartbreaking Work Of Staggering Genius: A Memoir Based on a True Story
Dave Eggers
3.5/5 (233)
The Outsider: A Novel
From Everand
The Outsider: A Novel
Stephen King
4/5 (2884)
Angela's Ashes: A Memoir
From Everand
Angela's Ashes: A Memoir
Frank McCourt
4.5/5 (943)
A Tree Grows in Brooklyn
From Everand
A Tree Grows in Brooklyn
Betty Smith
4.5/5 (2033)
The Light Between Oceans: A Novel
From Everand
The Light Between Oceans: A Novel
M.L. Stedman
4.5/5 (815)
The Art of Racing in the Rain: A Novel
From Everand
The Art of Racing in the Rain: A Novel
Garth Stein
4/5 (4360)
The Perks of Being a Wallflower
From Everand
The Perks of Being a Wallflower
Stephen Chbosky
4.5/5 (4102)
Little Women
From Everand
Little Women
Louisa May Alcott
4.5/5 (2369)

Data Exploration and Regression in Python With HBAT Dataset

Uploaded by

Data Exploration and Regression in Python With HBAT Dataset

Uploaded by

#importing the libraries

#there are outliers in x7,x12,x16,x18

# finding the iqr region

# finding upper and lower whiskers

# Create arrays of Boolean values indicating the outlier rows

# finding the iqr region

# finding upper and lower whiskers

# Create arrays of Boolean values indicating the outlier rows

# finding the iqr region

# finding upper and lower whiskers

# Create arrays of Boolean values indicating the outlier rows

# finding the iqr region

# finding upper and lower whiskers

# Create arrays of Boolean values indicating the outlier rows

You might also like