0% found this document useful (0 votes)
27 views6 pages

Aerofit Case Study Analysis - Ipynb - Colaboratory

Uploaded by

Harsha Lanka
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
27 views6 pages

Aerofit Case Study Analysis - Ipynb - Colaboratory

Uploaded by

Harsha Lanka
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

10/25/23, 6:00 PM Aerofit Case Study analysis.

ipynb - Colaboratory

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

url = "https://d2beiqkhq929f0.cloudfront.net/public_assets/assets/000/001/125/original/aerofit_treadmill.csv?1639992749"
data = pd.read_csv(url)

data.shape

(180, 9)

data.head()

Product Age Gender Education MaritalStatus Usage Fitness Income Miles

0 KP281 18 Male 14 Single 3 4 29562 112

1 KP281 19 Male 15 Single 2 3 31836 75

2 KP281 19 Female 14 Partnered 4 3 30699 66

3 KP281 19 Male 12 Single 3 3 32973 85

Product Age Gender Education MaritalStatus Usage Fitness Income Miles

data['Gender'].dtypes

dtype('O')

data.dtypes

Product object
Age int64
Gender object
Education int64
MaritalStatus object
Usage int64
Fitness int64
Income int64
Miles int64
dtype: object

data.describe()

Age Education Usage Fitness Income Miles

count 180.000000 180.000000 180.000000 180.000000 180.000000 180.000000

mean 28.788889 15.572222 3.455556 3.311111 53719.577778 103.194444

std 6.943498 1.617055 1.084797 0.958869 16506.684226 51.863605

min 18.000000 12.000000 2.000000 1.000000 29562.000000 21.000000

25% 24.000000 14.000000 3.000000 3.000000 44058.750000 66.000000

50% 26.000000 16.000000 3.000000 3.000000 50596.500000 94.000000

75% 33 000000 16 000000 4 000000 4 000000 58668 000000 114 750000

Gender_counts = data['Gender'].value_counts()
print(Gender_counts)

Male 104
Female 76
Name: Gender, dtype: int64

https://colab.research.google.com/drive/1HDCIdYeIZJKlt_2pHrisj8nmauAa53wp?authuser=0#scrollTo=qtC3d7NPoKDw&printMode=true 1/6
10/25/23, 6:00 PM Aerofit Case Study analysis.ipynb - Colaboratory
MaritalStatus_counts = data['MaritalStatus'].value_counts()
print(MaritalStatus_counts)

Partnered 107
Single 73
Name: MaritalStatus, dtype: int64

#plt.figure(figsize=(12, 5))
sns.distplot(data['Age'], bins=20, kde=False, color='skyblue')
plt.title('Age Distribution')
plt.xlabel('Age')
plt.ylabel('Frequency')
plt.show()

<ipython-input-29-92f34d3d87f0>:2: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(data['Age'], bins=20, kde=False, color='skyblue')

#plt.figure(figsize=(12, 5))
sns.distplot(data['Income'], bins=20, kde=False, color='salmon')
plt.title('Income Distribution')
plt.xlabel('Income ($)')
plt.ylabel('Frequency')
plt.show()

https://colab.research.google.com/drive/1HDCIdYeIZJKlt_2pHrisj8nmauAa53wp?authuser=0#scrollTo=qtC3d7NPoKDw&printMode=true 2/6
10/25/23, 6:00 PM Aerofit Case Study analysis.ipynb - Colaboratory

<ipython-input-28-8cd2e39d1613>:2: UserWarning:

`distplot` is a deprecated function and will be removed in seaborn v0.14.0.

Please adapt your code to use either `displot` (a figure-level function with
similar flexibility) or `histplot` (an axes-level function for histograms).

For a guide to updating your code to use the new functions, please see
https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751

sns.distplot(data['Income'], bins=20, kde=False, color='salmon')

sns.boxplot(x='Product', y='Age', data=data, palette='Set3')


plt.title('Product Purchased vs. Age')
plt.xlabel('Product Purchased')
plt.ylabel('Age')
plt.show()

# Correlation heatmap
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap')
plt.show()

https://colab.research.google.com/drive/1HDCIdYeIZJKlt_2pHrisj8nmauAa53wp?authuser=0#scrollTo=qtC3d7NPoKDw&printMode=true 3/6
10/25/23, 6:00 PM Aerofit Case Study analysis.ipynb - Colaboratory

<ipython-input-32-43edec4bd922>:2: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version
correlation_matrix = data.corr()

# Pairplot for numerical variables


sns.pairplot(data, hue='Product', palette='Set1')
plt.title('Pairplot for Numerical Variables')
plt.show()

https://colab.research.google.com/drive/1HDCIdYeIZJKlt_2pHrisj8nmauAa53wp?authuser=0#scrollTo=qtC3d7NPoKDw&printMode=true 4/6
10/25/23, 6:00 PM Aerofit Case Study analysis.ipynb - Colaboratory

#Missing Values
missing_values = data.isnull().sum()
print(missing_values)

Product 0
Age 0
Gender 0
Education 0
MaritalStatus 0
Usage 0
Fitness 0
Income 0
Miles 0
dtype: int64

#Outlier Values
# Outlier detection using boxplots
plt.figure(figsize=(12, 5))
# Boxplot for 'Age'
plt.subplot(131)
sns.boxplot(x=data['Age'], color='skyblue')
plt.title('Age Boxplot')

output Text(0.5, 1.0, 'Age Boxplot')

plt.figure(figsize=(12, 5))
# Boxplot for 'Income'
plt.subplot(132)
sns.boxplot(x=data['Income'], color='salmon')
plt.title('Income Boxplot')

https://colab.research.google.com/drive/1HDCIdYeIZJKlt_2pHrisj8nmauAa53wp?authuser=0#scrollTo=qtC3d7NPoKDw&printMode=true 5/6
10/25/23, 6:00 PM Aerofit Case Study analysis.ipynb - Colaboratory

Text(0.5, 1.0, 'Income Boxplot')

#Outlier Values
# Outlier detection using boxplots
plt.figure(figsize=(12, 5))

# Boxplot for 'Age'


plt.subplot(131)
sns.boxplot(x=data['Age'], color='skyblue')
plt.title('Age Boxplot')

# Boxplot for 'Income'


plt.subplot(132)
sns.boxplot(x=data['Income'], color='salmon')
plt.title('Income Boxplot')

# Boxplot for 'Usage'


plt.subplot(133)
sns.boxplot(x=data['Usage'], color='lightgreen')
plt.title('Usage Boxplot')

plt.tight_layout()
plt.show()

https://colab.research.google.com/drive/1HDCIdYeIZJKlt_2pHrisj8nmauAa53wp?authuser=0#scrollTo=qtC3d7NPoKDw&printMode=true 6/6

You might also like