Data Analysis Report

Uploaded by

thaikhang01122007

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PPTX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

10 views74 pages

Data Analysis Report

Uploaded by

thaikhang01122007

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PPTX, PDF, TXT or read online on Scribd

You are on page 1/ 74

Data Analysis Report

Generated from Jupyter Notebook

Code Cell
• import pandas as pd
• import numpy as np
• import matplotlib.pyplot as plt
• import seaborn as sns
• import pylab
• import warnings
• %matplotlib inline
• sns.set(style="darkgrid",font_scale=1.5)
• pd.set_option("display.max.columns",None)
Code Cell
• from google.colab import files

• files.upload()
Code Cell
• df =
pd.read_csv("https://raw.githubusercontent.c
om/amankharwal/Website-data/master/
CarPrice.csv")
Code Cell
• df.head()
Code Cell
• df.tail()
Code Cell
• df.shape
Code Cell
• df.info()
Code Cell
• df.describe()
Code Cell
• df.isnull().sum()
Code Cell
• print("Duplicate Values
=",df.duplicated().sum())
Code Cell
• df.select_dtypes(include=["int","float"]).head(
)
Markdown Cell
• # **DATA CLEANING**
Code Cell
• Company_Name =
df["CarName"].apply(lambda x: x.split(" ")[0])
• df.insert(2,"CompanyName",Company_Name)

• df.drop(columns=["CarName"],inplace=True)
Code Cell
• df.head()
Code Cell
• def replace(a,b):
•
df["CompanyName"].replace(a,b,inplace=True
)

• replace('maxda','mazda')
• replace('porcshce','porsche')
• replace('toyouta','toyota')
• replace('vokswagen','volkswagen')
Code Cell
• df["CompanyName"].unique()
Markdown Cell
• # **EDA**
Code Cell
• plt.figure(figsize=(20, 6))

• plt.subplot(1, 2, 1)
• sns.distplot(df["price"], color="red", kde=True)
• plt.title("Car Price Distribution",
fontweight="black", pad=20, fontsize=20)

• plt.subplot(1, 2, 2)
• sns.boxplot(y=df["price"], palette="Set2")
Code Cell
• df["price"].agg(["min","mean","median","max
","std","skew"]).to_frame().T
Code Cell
• plt.figure(figsize=(14,6))
• counts = df["CompanyName"].value_counts()
• sns.barplot(x=counts.index, y=counts.values)
• plt.xlabel("Car Company")
• plt.ylabel("Total No. of cars sold")
• plt.title("Total Cars produced by Companies",
pad=20, fontweight="black", fontsize=20)
• plt.xticks(rotation=90)
• plt.show()
Code Cell
• df[df["CompanyName"]=="renault"]
Code Cell
• df[df["CompanyName"]=="mercury"]
Code Cell
• df[df["CompanyName"]=="porshe"]
Code Cell
• def clean_company_names(df, column):
• df[column] = df[column].str.lower()
• df[column] = df[column].replace({
• 'porshe': 'porsche',
• 'vw': 'volkswagen',
• })
• return df

• df = clean_company_names(df,
Code Cell
• df["fueltype"].unique()
Code Cell
• def categorical_visualization(cols):
• plt.figure(figsize=(20,10))
• plt.subplot(1,3,1)
•
sns.countplot(x=cols,data=df,palette="Set2",or
der=df[cols].value_counts().index)
• plt.title(f"{cols}
Distribution",pad=10,fontweight="black",fonts
ize=18)
• plt.xticks(rotation=90)
Code Cell
• df["aspiration"].unique()
Code Cell
• categorical_visualization("aspiration")
Code Cell
• categorical_visualization("doornumber")
Code Cell
• categorical_visualization("carbody")
Code Cell
• categorical_visualization("drivewheel")
Code Cell
• categorical_visualization("enginelocation")
Code Cell
• df[df["enginelocation"]=="rear"]
Code Cell
• categorical_visualization("enginetype")
Code Cell
• df[df["enginetype"]=="rotor"]
Code Cell
• df[df["enginetype"]=="dohcv"]
Code Cell
• categorical_visualization("cylindernumber")
Code Cell
• df[df["cylindernumber"]=="three"]
Code Cell
• df[df["cylindernumber"]=="twelve"]
Code Cell
• categorical_visualization("fuelsystem")
Code Cell
• df[df["fuelsystem"]=="mfi"]
Code Cell
• df[df["fuelsystem"]=="spfi"]
Code Cell
• categorical_visualization("symboling")
Code Cell
• def scatter_plot(cols):
• x=1
• plt.figure(figsize=(15,6))
• for col in cols:
• plt.subplot(1,3,x)
•
sns.scatterplot(x=col,y="price",data=df,color="
blue")
• plt.title(f"{col} vs
Code Cell
• scatter_plot(["carlength","carwidth","carheigh
t"])
Code Cell
• scatter_plot(["enginesize","boreratio","stroke"
])
Code Cell
• scatter_plot(["compressionratio","horsepower
","peakrpm"])
Code Cell
• def scatter_plot(cols):
• q_low = df["price"].quantile(0.01)
• q_hi = df["price"].quantile(0.99)
• df_filtered = df[(df["price"] > q_low) &
(df["price"] < q_hi)]
• x=1
• plt.figure(figsize=(15,6))
• for col in cols:
• plt.subplot(1,2,x)
Code Cell
• scatter_plot(["wheelbase","curbweight"])
Code Cell
• scatter_plot(["citympg","highwaympg"])
Code Cell
• f = round(df.groupby(["CompanyName"])
["price"].agg(["mean"]),2).T
• f
Code Cell
• df =
df.merge(f.T,how="left",on="CompanyName")
Code Cell
• bins = [0,10000,20000,40000]
• cars_bin=['Budget','Medium','Highend']
• df['CarsRange'] =
pd.cut(df['mean'],bins,right=False,labels=cars_
bin)
• df.head()
Code Cell
• new_df =
df[['fueltype','aspiration','doornumber','carbo
dy','drivewheel','enginetype','cylindernumber'
,'fuelsystem'
• ,'wheelbase','carlength','carwidth','cur
bweight','enginesize','boreratio','horsepower','
citympg','highwaympg',
• 'price','CarsRange']]
Code Cell
• new_df.head()
Code Cell
• new_df =
pd.get_dummies(columns=["fueltype","aspirat
ion","doornumber","carbody","drivewheel","e
nginetype",
•
"cylindernumber","fuelsystem","CarsRange"],
data=new_df)
Code Cell
• new_df.head()
Code Cell
• scaler = StandardScaler()
Code Cell
• num_cols =
['wheelbase','carlength','carwidth','curbweight
','enginesize','boreratio','horsepower',
• 'citympg','highwaympg']

• new_df[num_cols] =
scaler.fit_transform(new_df[num_cols])
Code Cell
• new_df.head()
Code Cell
• x = new_df.drop(columns=["price"])
• y = new_df["price"]
Code Cell
• x.shape
Code Cell
• y.shape
Code Cell
• x_train,x_test,y_train,y_test=train_test_split(x
,y,test_size=0.2,random_state=42)
Code Cell
• print("x_train - > ",x_train.shape)
• print("x_test - > ",x_test.shape)
• print("y_train - > ",y_train.shape)
• print("y_test - > ",y_test.shape)
Markdown Cell
• # **MODEL BUILDING**
Code Cell
• training_score = []
• testing_score = []
Code Cell
• def model_prediction(model):
• model.fit(x_train,y_train)
• x_train_pred = model.predict(x_train)
• x_test_pred = model.predict(x_test)
• a = r2_score(y_train,x_train_pred)*100
• b = r2_score(y_test,x_test_pred)*100
• training_score.append(a)
• testing_score.append(b)
Code Cell
• model_prediction(LinearRegression())
Code Cell
• model_prediction(DecisionTreeRegressor())
Code Cell
• model_prediction(RandomForestRegressor())
Code Cell
• models = ["Linear Regression","Decision
Tree","Random Forest"]

• df = pd.DataFrame({"Algorithms":models,
• "Training Score":training_score,
• "Testing Score":testing_score})
• df
Code Cell
• df.plot(x="Algorithms",y=["Training
Score","Testing Score"],
figsize=(16,6),kind="bar",
• title="Performance Visualization of
Different Models",colormap="Set1")
• plt.show()

List of Car Dimensions (RTL)
100% (1)
List of Car Dimensions (RTL)
12 pages
Porsche by Robert Walker
No ratings yet
Porsche by Robert Walker
67 pages
Porsche Engineering
100% (2)
Porsche Engineering
40 pages
2022 06 MHI2 Password List V4.0
100% (1)
2022 06 MHI2 Password List V4.0
31 pages
Dissertation Report On Automobile Sector
No ratings yet
Dissertation Report On Automobile Sector
46 pages
Model Military International I166 02.2020
No ratings yet
Model Military International I166 02.2020
68 pages
Linear Regression
100% (1)
Linear Regression
16 pages
Case Study 1 Porsche Questions and Answe
No ratings yet
Case Study 1 Porsche Questions and Answe
6 pages
Air Compressor
No ratings yet
Air Compressor
16 pages
Project 8 Predictive Analytics - Ipynb - Colaboratory
No ratings yet
Project 8 Predictive Analytics - Ipynb - Colaboratory
8 pages
SVM (Support Vector Machine) For Classification - by Aditya Kumar - Towards Data Science
100% (1)
SVM (Support Vector Machine) For Classification - by Aditya Kumar - Towards Data Science
28 pages
Model
No ratings yet
Model
164 pages
Porsche
No ratings yet
Porsche
30 pages
Car Price Prediction
No ratings yet
Car Price Prediction
72 pages
All Car Brands List and Logos - Stock Photos: Cart My Account
No ratings yet
All Car Brands List and Logos - Stock Photos: Cart My Account
1 page
Volkswagen Case Study International Finance
No ratings yet
Volkswagen Case Study International Finance
15 pages
EDA Withoutcode
No ratings yet
EDA Withoutcode
36 pages
Data Analysis
No ratings yet
Data Analysis
58 pages
Task 3 Car Price Prediction Using Machine Learning
No ratings yet
Task 3 Car Price Prediction Using Machine Learning
30 pages
Practical Example Full Notes
No ratings yet
Practical Example Full Notes
48 pages
Car Price Prediction 1
No ratings yet
Car Price Prediction 1
24 pages
City Cycle Fuel Consumption 2024
No ratings yet
City Cycle Fuel Consumption 2024
23 pages
Aayushi Bda File
No ratings yet
Aayushi Bda File
41 pages
Car Price Prediction
No ratings yet
Car Price Prediction
35 pages
Internship
No ratings yet
Internship
23 pages
Python Codes
No ratings yet
Python Codes
17 pages
EDS - Python Cheat Sheet
0% (1)
EDS - Python Cheat Sheet
3 pages
Overview Winter Tires 996986
No ratings yet
Overview Winter Tires 996986
2 pages
Trilokesh Assignment
No ratings yet
Trilokesh Assignment
15 pages
Lec ExploratoryDataAnalysis1Unit5Part1
No ratings yet
Lec ExploratoryDataAnalysis1Unit5Part1
22 pages
Machine Learning With Python - Part-2
No ratings yet
Machine Learning With Python - Part-2
27 pages
AI-MAJOR-AUGUST - Aryal Ashish
No ratings yet
AI-MAJOR-AUGUST - Aryal Ashish
16 pages
Data Vizualization - Jupyter Notebook
No ratings yet
Data Vizualization - Jupyter Notebook
20 pages
GmPrac1 - Jupyter Notebook
No ratings yet
GmPrac1 - Jupyter Notebook
11 pages
Xii Project PDF
No ratings yet
Xii Project PDF
19 pages
Company Case - Porsche
100% (1)
Company Case - Porsche
3 pages
Data Mining
No ratings yet
Data Mining
10 pages
Note
No ratings yet
Note
9 pages
Dav Week8 240953580
No ratings yet
Dav Week8 240953580
15 pages
Quikr Car Price Prediction Using Linear Regression 1717999953
No ratings yet
Quikr Car Price Prediction Using Linear Regression 1717999953
12 pages
Exp 5 Exploratory Data Analysis SDK Ok
No ratings yet
Exp 5 Exploratory Data Analysis SDK Ok
13 pages
Elite Sports Cars Eda
No ratings yet
Elite Sports Cars Eda
9 pages
Laptop Price Prediction
No ratings yet
Laptop Price Prediction
15 pages
Car Price Prediction Using ML
No ratings yet
Car Price Prediction Using ML
11 pages
Data Preparation-All Pds
No ratings yet
Data Preparation-All Pds
15 pages
Machine Learning Project 1690186790
No ratings yet
Machine Learning Project 1690186790
18 pages
Problem Statement Is To Predict Price Column Based On Data With 24 Columns With Over 200 Data Entries Using Linear Regression
No ratings yet
Problem Statement Is To Predict Price Column Based On Data With 24 Columns With Over 200 Data Entries Using Linear Regression
5 pages
Engo 645
No ratings yet
Engo 645
9 pages
Data Wrangling
No ratings yet
Data Wrangling
24 pages
PRJ Car Price Prediction For Data Science
No ratings yet
PRJ Car Price Prediction For Data Science
10 pages
Intro To Exploratory Data Analysis Eda in Python
No ratings yet
Intro To Exploratory Data Analysis Eda in Python
7 pages
Data Frames and Charts 2: 2.1 Dealing With Missing Values
No ratings yet
Data Frames and Charts 2: 2.1 Dealing With Missing Values
12 pages
Case Study
No ratings yet
Case Study
21 pages
2
No ratings yet
2
6 pages
Case Worksheet Volkswagen AG
50% (2)
Case Worksheet Volkswagen AG
3 pages
Data Analysis: Data Preparation
No ratings yet
Data Analysis: Data Preparation
9 pages
Practical 2 .Ipynb - Colab
No ratings yet
Practical 2 .Ipynb - Colab
9 pages
Eda Notes
No ratings yet
Eda Notes
4 pages
Data Clearning
No ratings yet
Data Clearning
7 pages
Report
No ratings yet
Report
4 pages
Submitted By:-Shaikshahanaafroz - Cms20Mba093: 1. Identify The Shape of The Data
No ratings yet
Submitted By:-Shaikshahanaafroz - Cms20Mba093: 1. Identify The Shape of The Data
6 pages
Introduction To Python - Minor Project
No ratings yet
Introduction To Python - Minor Project
5 pages
Data Analytics Using Python
No ratings yet
Data Analytics Using Python
7 pages
#1 - Skill Builds - Data Analysis With Python
No ratings yet
#1 - Skill Builds - Data Analysis With Python
3 pages
Mohy - Jupyter Notebook
No ratings yet
Mohy - Jupyter Notebook
3 pages
DS On MTCARS Solutions
No ratings yet
DS On MTCARS Solutions
3 pages
Statisitics Project 3
No ratings yet
Statisitics Project 3
22 pages
C01 MiniCase Porsche
No ratings yet
C01 MiniCase Porsche
16 pages
Statisitics Project 7
No ratings yet
Statisitics Project 7
22 pages
UCD Linear Reg2
No ratings yet
UCD Linear Reg2
3 pages
Car Mock - ML Ans
No ratings yet
Car Mock - ML Ans
6 pages
Xətti Reqressiya Modelinin Qurulması
No ratings yet
Xətti Reqressiya Modelinin Qurulması
4 pages
Brake Pad List
100% (1)
Brake Pad List
5 pages
Porsche
No ratings yet
Porsche
14 pages
Numpy,,Pandas (24.4.25)
No ratings yet
Numpy,,Pandas (24.4.25)
1 page
Jazziiii Volkswagen
No ratings yet
Jazziiii Volkswagen
31 pages
Binning and Normalization Activity
No ratings yet
Binning and Normalization Activity
2 pages
Cars
No ratings yet
Cars
11 pages
Porsche Boxster and Cayman
No ratings yet
Porsche Boxster and Cayman
10 pages
Porsche Casestudy
No ratings yet
Porsche Casestudy
2 pages
GMS Group 18 Porsche
No ratings yet
GMS Group 18 Porsche
31 pages
【605】The One With Joey's Porsche
No ratings yet
【605】The One With Joey's Porsche
23 pages
Porsche Service - Imagebrochure
No ratings yet
Porsche Service - Imagebrochure
17 pages
English Project Assessment Cover Page Document in White Purple Fun Style
No ratings yet
English Project Assessment Cover Page Document in White Purple Fun Style
24 pages
MC 10208683 0001
No ratings yet
MC 10208683 0001
9 pages
PCM 5.X Overview
No ratings yet
PCM 5.X Overview
19 pages
Pune CustomerData
No ratings yet
Pune CustomerData
15 pages
Best Car Brands in Germany - Google Search
No ratings yet
Best Car Brands in Germany - Google Search
1 page
Essential n8n Playbook
From Everand
Essential n8n Playbook
Leandro Calado
No ratings yet
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet

Data Analysis Report

Uploaded by

Data Analysis Report

Uploaded by

Data Analysis Report

Generated from Jupyter Notebook

You might also like