0% found this document useful (0 votes)

21 views4 pages

BHMC17 P5.ipynb - Colaboratory

Uploaded by

aresjod45

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

21 views4 pages

BHMC17 P5.ipynb - Colaboratory

Uploaded by

aresjod45

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 4

10/14/23, 12:20 PM BHMC17 p5.

ipynb - Colaboratory

import numpy as np # linear algebra

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns #visualization
import matplotlib.pyplot as plt
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))

data = pd.read_csv("/EconomiesOfScale.csv")

from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive

data.head()

output Number of Units Manufacturing Cost

0 1.000000 95.066056

1 1.185994 96.531750

2 1.191499 73.661311

3 1.204771 95.566843

4 1.298773 98.777013

data.describe()

Number of Units Manufacturing Cost

count 1000.000000 1000.000000

mean 4.472799 40.052999

std 1.336241 10.595322

min 1.000000 20.000000

25% 3.594214 32.912036

50% 4.435958 38.345781

75% 5.324780 44.531822

max 10.000000 100.000000

nans=pd.isnull(data).sum()
nans[nans>0]

Series([], dtype: int64)

data.shape[0]

1000

Let's carry out some visualizations using Seaborn

ax = sns.boxplot(x=data["Manufacturing Cost"])

https://colab.research.google.com/drive/1Uvk4Nqweukf7QH3oYbfyTY417obmtB4J#printMode=true 1/4
10/14/23, 12:20 PM BHMC17 p5.ipynb - Colaboratory

ax = sns.boxplot(x=data['Number of Units'])

sns.jointplot(data=data, x="Number of Units", y="Manufacturing Cost", kind="reg")

<seaborn.axisgrid.JointGrid at 0x7cfbc13ba6b0>

from sklearn.preprocessing import MinMaxScaler

sc = MinMaxScaler(feature_range = (0, 1))
scaled_data = sc.fit_transform(data)
scaled_data

array([[0. , 0.9383257 ],
[0.02066596, 0.95664687],
[0.02127763, 0.67076638],
...,
[0.86454312, 0.07467234],

https://colab.research.google.com/drive/1Uvk4Nqweukf7QH3oYbfyTY417obmtB4J#printMode=true 2/4
10/14/23, 12:20 PM BHMC17 p5.ipynb - Colaboratory
[0.87752219, 0.06422889],
[1. , 0.01934721]])

scaled_data = pd.DataFrame(scaled_data, columns = ['Number of Units', 'Manufacturing Cost'])

scaled_data

Number of Units Manufacturing Cost

0 0.000000 0.938326

1 0.020666 0.956647

2 0.021278 0.670766

3 0.022752 0.944586

4 0.033197 0.984713

... ... ...

995 0.788857 0.048188

996 0.859972 0.094207

997 0.864543 0.074672

998 0.877522 0.064229

999 1.000000 0.019347

1000 rows × 2 columns

y = scaled_data.pop('Manufacturing Cost')
y

0 0.938326
1 0.956647
2 0.670766
3 0.944586
4 0.984713
...
995 0.048188
996 0.094207
997 0.074672
998 0.064229
999 0.019347
Name: Manufacturing Cost, Length: 1000, dtype: float64

X = scaled_data.values

"""
Split the dataset into training set and test set with an 80-20 ratio
"""
from sklearn.model_selection import train_test_split
seed=1
X_train, X_test, \
y_train, y_test = train_test_split(X, y, test_size=0.2, \
random_state=42)

from sklearn.svm import SVR

regr= SVR(C=1.0, epsilon=0.2)

regr.fit(X_train,y_train.ravel())

▾ SVR
SVR(epsilon=0.2)

y_pred = regr.predict(X_test)
y_pred

array([0.23476998, 0.19564829, 0.19552012, 0.20350118, 0.25761209,

0.20152073, 0.20868388, 0.23718671, 0.20304453, 0.35180452,
0.19570875, 0.41292374, 0.20709024, 0.23183355, 0.22813452,
0.21300845, 0.28898906, 0.20898637, 0.19462914, 0.27781544,
0.22914077, 0.33378322, 0.26540822, 0.23374456, 0.317575 ,
0.30691577, 0.38566758, 0.22948237, 0.21358697, 0.22992041,
0.27087766, 0.35108264, 0.20962335, 0.24059563, 0.26569672,
0.32181694, 0.20065724, 0.22099506, 0.21344844, 0.45573471,
0.27529935, 0.38878804, 0.27968523, 0.23177926, 0.28450419,
0.29023651, 0.22285173, 0.2161231 , 0.25313191, 0.19790806,
0.2250116 , 0.20793397, 0.29438255, 0.22256105, 0.60464413,

https://colab.research.google.com/drive/1Uvk4Nqweukf7QH3oYbfyTY417obmtB4J#printMode=true 3/4
10/14/23, 12:20 PM BHMC17 p5.ipynb - Colaboratory
0.58535064, 0.21132973, 0.70114225, 0.31394558, 0.19646868,
0.28550935, 0.46230228, 0.23054964, 0.3178442 , 0.21526703,
0.20025726, 0.20334417, 0.2047899 , 0.41879638, 0.23031656,
0.37798776, 0.24097249, 0.22005137, 0.19537004, 0.28626906,
0.28661788, 0.20441796, 0.5164567 , 0.22013279, 0.28129508,
0.20147005, 0.42688768, 0.29052581, 0.42638852, 0.27804957,
0.22929735, 0.22977071, 0.19732925, 0.19497802, 0.27538463,
0.2633768 , 0.25082279, 0.23453376, 0.31433468, 0.19467478,
0.25353211, 0.19456455, 0.39709234, 0.43353714, 0.19685745,
0.1971824 , 0.26981381, 0.22775389, 0.29543255, 0.20733488,
0.20571743, 0.22817938, 0.23200193, 0.33702665, 0.22039579,
0.23372122, 0.19661119, 0.22950817, 0.19593826, 0.39894177,
0.25449299, 0.32918061, 0.23097683, 0.2329515 , 0.28591771,
0.2110974 , 0.25549309, 0.19736443, 0.29471497, 0.30469764,
0.22748822, 0.22111173, 0.25231059, 0.2048678 , 0.19524356,
0.21631618, 0.20047142, 0.29288796, 0.22982797, 0.23354053,
0.46075801, 0.36576062, 0.31622165, 0.59955219, 0.41707837,
0.49931476, 0.29957463, 0.19680618, 0.28884013, 0.21141338,
0.21829503, 0.19566166, 0.30106666, 0.19659288, 0.27753212,
0.20592251, 0.34307049, 0.23110011, 0.25478658, 0.22832641,
0.24935498, 0.28069803, 0.19986757, 0.45245932, 0.20906462,
0.21711763, 0.19448755, 0.20520075, 0.205105 , 0.20446054,
0.20282835, 0.2849949 , 0.20061093, 0.27949631, 0.28022113,
0.26771314, 0.24630999, 0.37452241, 0.2290563 , 0.2430412 ,
0.26757145, 0.29705325, 0.2948896 , 0.19535961, 0.20187177,
0.24093193, 0.20266503, 0.26391159, 0.2341701 , 0.19928893,
0.35138871, 0.26921077, 0.26709635, 0.22872646, 0.19508277,
0.23028492, 0.19709374, 0.20392945, 0.32127952, 0.31694141,
0.25830457, 0.27435644, 0.31842808, 0.21207435, 0.40956217])

from sklearn.metrics import r2_score,mean_squared_error

mse = mean_squared_error(y_test,y_pred)
rmse= np.sqrt(mse)
rmse

0.0822903295452384

mse

0.006771698336663936

r2_score(y_test, y_pred)

0.4700318323625766

The R-square value is approximately 0.5 which is quite encouraging. We can improve this work by
trying out algorithms as well.

https://colab.research.google.com/drive/1Uvk4Nqweukf7QH3oYbfyTY417obmtB4J#printMode=true 4/4

Answer PDF Lab
No ratings yet
Answer PDF Lab
34 pages
AD3411 DATA SCIENCE AND ANALYTICS LAB (2) - Removed
No ratings yet
AD3411 DATA SCIENCE AND ANALYTICS LAB (2) - Removed
24 pages
03 Multiple Linear Regression
No ratings yet
03 Multiple Linear Regression
7 pages
Nb3 (Optional)
No ratings yet
Nb3 (Optional)
35 pages
Python Tut Gradient Descent Algos MLR - Jupyter Notebook
No ratings yet
Python Tut Gradient Descent Algos MLR - Jupyter Notebook
40 pages
Prob13: 1 EE16A Homework 13
No ratings yet
Prob13: 1 EE16A Homework 13
23 pages
Import As
100% (1)
Import As
27 pages
PCA
No ratings yet
PCA
23 pages
Implementation of Simple Linear Regression Algorithm Using Python
No ratings yet
Implementation of Simple Linear Regression Algorithm Using Python
12 pages
Aiml Lab
No ratings yet
Aiml Lab
37 pages
Machine Learning
No ratings yet
Machine Learning
31 pages
Ai ML Programs
No ratings yet
Ai ML Programs
34 pages
ModuleAr Merged
No ratings yet
ModuleAr Merged
42 pages
Python Practical
No ratings yet
Python Practical
35 pages
Ilovepdf Merged
No ratings yet
Ilovepdf Merged
47 pages
Final ML File
No ratings yet
Final ML File
34 pages
Data Preparation
No ratings yet
Data Preparation
11 pages
Stats
No ratings yet
Stats
33 pages
Lab - 7 - 21130616 - TranhThanhVu - Ipynb - Colab
No ratings yet
Lab - 7 - 21130616 - TranhThanhVu - Ipynb - Colab
10 pages
AD3411
No ratings yet
AD3411
28 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
Tarea 8
No ratings yet
Tarea 8
7 pages
Data Science Manual
No ratings yet
Data Science Manual
16 pages
Stat Lab
No ratings yet
Stat Lab
24 pages
22
No ratings yet
22
7 pages
ML Practice Assignment
No ratings yet
ML Practice Assignment
7 pages
ML Lab
No ratings yet
ML Lab
23 pages
ML Lab File Final
No ratings yet
ML Lab File Final
17 pages
Garishav Basra 102103129 2CO5
No ratings yet
Garishav Basra 102103129 2CO5
8 pages
Pattern Recognition
No ratings yet
Pattern Recognition
26 pages
EXAM PREPERATION - Ipynb - Colaboratory-1
No ratings yet
EXAM PREPERATION - Ipynb - Colaboratory-1
8 pages
Ex7 HTML
No ratings yet
Ex7 HTML
3 pages
HW8 La
No ratings yet
HW8 La
18 pages
B22EE010 Report
No ratings yet
B22EE010 Report
9 pages
Fds Mannual
No ratings yet
Fds Mannual
39 pages
Localweighted - Jupyter Notebook
No ratings yet
Localweighted - Jupyter Notebook
4 pages
Ai Lab
No ratings yet
Ai Lab
19 pages
Pca 2382487
No ratings yet
Pca 2382487
8 pages
Python - How To Make A 4d Plot With Matplotlib Using Arbitrary Data - Stack Overflow
No ratings yet
Python - How To Make A 4d Plot With Matplotlib Using Arbitrary Data - Stack Overflow
13 pages
Student - Linear Regression Example - Colaboratory
No ratings yet
Student - Linear Regression Example - Colaboratory
6 pages
ADADELTA
No ratings yet
ADADELTA
2 pages
Model Training
No ratings yet
Model Training
6 pages
Important Steps: Gensim: A Python Library For NLP and Word Embeddings
No ratings yet
Important Steps: Gensim: A Python Library For NLP and Word Embeddings
31 pages
4.4. Data Standardization - Ipynb - Colaboratory
No ratings yet
4.4. Data Standardization - Ipynb - Colaboratory
1 page
COMPARISON - Jupyter Notebook
No ratings yet
COMPARISON - Jupyter Notebook
5 pages
Data Analytics Lab Manual
No ratings yet
Data Analytics Lab Manual
26 pages
Data - Preprocessing - Tools - Ipynb - Colaboratory
No ratings yet
Data - Preprocessing - Tools - Ipynb - Colaboratory
4 pages
Mlext
No ratings yet
Mlext
1 page
BT2 - pca - (8x8) .Nguyễn Hoài Hân-2113313
No ratings yet
BT2 - pca - (8x8) .Nguyễn Hoài Hân-2113313
5 pages
Nlp2.ipynb - Colab
No ratings yet
Nlp2.ipynb - Colab
3 pages
HW1
No ratings yet
HW1
2 pages
DATA SCIENCE IDC 302 End Sem Project
No ratings yet
DATA SCIENCE IDC 302 End Sem Project
1 page
A Training and Testing Model Is Developed Using The Provided Dataset in Jupyter Notebook 2
No ratings yet
A Training and Testing Model Is Developed Using The Provided Dataset in Jupyter Notebook 2
4 pages
Minutely
No ratings yet
Minutely
1 page
L - AND - T - Project - Naveen 24cs002895
No ratings yet
L - AND - T - Project - Naveen 24cs002895
7 pages
Traffic Analysis - LMC-01
67% (3)
Traffic Analysis - LMC-01
15 pages
DSBDA6
No ratings yet
DSBDA6
6 pages
CVDL (Practical No. 3)
No ratings yet
CVDL (Practical No. 3)
1 page
Code2pdf 681f5479c670f
No ratings yet
Code2pdf 681f5479c670f
3 pages
Inbuilt Kmeans
No ratings yet
Inbuilt Kmeans
3 pages
Machine Design-Ii: Gears
100% (1)
Machine Design-Ii: Gears
50 pages
January 1995 PW
100% (1)
January 1995 PW
78 pages
CFD Tutorial 1 - Elbow
100% (1)
CFD Tutorial 1 - Elbow
26 pages
Meq Model Questions
0% (1)
Meq Model Questions
4 pages
GED OnlineTest R2 Sci
No ratings yet
GED OnlineTest R2 Sci
18 pages
M00000XXX Honing Cylinder Liners
No ratings yet
M00000XXX Honing Cylinder Liners
11 pages
S70me-C8 5
No ratings yet
S70me-C8 5
406 pages
Declarative Programming
No ratings yet
Declarative Programming
35 pages
Unit III 1
No ratings yet
Unit III 1
11 pages
Introduction To Number System
100% (1)
Introduction To Number System
15 pages
Akashi-Kaikyo Bridge
No ratings yet
Akashi-Kaikyo Bridge
17 pages
Nnew - DC Lab Manual
No ratings yet
Nnew - DC Lab Manual
106 pages
EastWestAirlines Cluster
100% (1)
EastWestAirlines Cluster
6 pages
DSA Internal Exam Questions With Quiz
No ratings yet
DSA Internal Exam Questions With Quiz
4 pages
什么是自动分配ip地址的服务器？
100% (1)
什么是自动分配ip地址的服务器？
6 pages
Kidney Disease Early-Stage Identification and Prevention Using Supervised Machine Learning
No ratings yet
Kidney Disease Early-Stage Identification and Prevention Using Supervised Machine Learning
6 pages
Greenhouse Monitoring and Control System Based On Wireless Sensor Network
No ratings yet
Greenhouse Monitoring and Control System Based On Wireless Sensor Network
4 pages
Cells and Tissues
No ratings yet
Cells and Tissues
5 pages
Machine Learning Based Integrated Scheduling and Rescheduling For Elective and Emergency Patients in The Operating Theatre
No ratings yet
Machine Learning Based Integrated Scheduling and Rescheduling For Elective and Emergency Patients in The Operating Theatre
24 pages
Learning Curve
No ratings yet
Learning Curve
4 pages
Classification of Reservoirs and Reservoir Fluid Properties: Dr. Farqad Hadi
No ratings yet
Classification of Reservoirs and Reservoir Fluid Properties: Dr. Farqad Hadi
7 pages
The Dynamic Amplification Factor of The Bridges in Latvia: Ilze Paeglite, Ainars Paeglitis
No ratings yet
The Dynamic Amplification Factor of The Bridges in Latvia: Ilze Paeglite, Ainars Paeglitis
8 pages
Index: Jasper Report - Tutorial (English Version)
No ratings yet
Index: Jasper Report - Tutorial (English Version)
23 pages
Cincinnati Products Catalog
No ratings yet
Cincinnati Products Catalog
20 pages
MS Access II PDF
No ratings yet
MS Access II PDF
44 pages
Gigamon Gigavue VM Virtual Machine 4022
No ratings yet
Gigamon Gigavue VM Virtual Machine 4022
7 pages
How Find Out How Many Numbers in A Minitab Column Are in A Given Range
No ratings yet
How Find Out How Many Numbers in A Minitab Column Are in A Given Range
2 pages
Wireless Network Lab 2
No ratings yet
Wireless Network Lab 2
3 pages
The Fibonacci Number Series
From Everand
The Fibonacci Number Series
Michael Husted
5/5 (1)

BHMC17 P5.ipynb - Colaboratory

Uploaded by

BHMC17 P5.ipynb - Colaboratory

Uploaded by

10/14/23, 12:20 PM BHMC17 p5.

import numpy as np # linear algebra

from google.colab import drive

output Number of Units Manufacturing Cost

Number of Units Manufacturing Cost

count 1000.000000 1000.000000

mean 4.472799 40.052999

std 1.336241 10.595322

min 1.000000 20.000000

25% 3.594214 32.912036

50% 4.435958 38.345781

75% 5.324780 44.531822

max 10.000000 100.000000

Series([], dtype: int64)

Let's carry out some visualizations using Seaborn

sns.jointplot(data=data, x="Number of Units", y="Manufacturing Cost", kind="reg")

from sklearn.preprocessing import MinMaxScaler

scaled_data = pd.DataFrame(scaled_data, columns = ['Number of Units', 'Manufacturing Cost'])

Number of Units Manufacturing Cost

... ... ...

995 0.788857 0.048188

996 0.859972 0.094207

997 0.864543 0.074672

998 0.877522 0.064229

999 1.000000 0.019347

1000 rows × 2 columns

from sklearn.svm import SVR

regr= SVR(C=1.0, epsilon=0.2)

array([0.23476998, 0.19564829, 0.19552012, 0.20350118, 0.25761209,

from sklearn.metrics import r2_score,mean_squared_error

You might also like