Data Science Manual

Data Science Lab Experiment

Uploaded by

KRISHNAVENI R

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

42 views16 pages

Data Science Manual

Data Science Lab Experiment

Uploaded by

KRISHNAVENI R

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 16

Exp:01 Working With Data Frame

Program:
Import pandas as pd Data={“calories”:
[420,380,390],”duration”:[50,40,45]} #load data into a
dataframe
df=pd.DataFrame(Data)
print(df.loc[0])

Output:
calories 420
duration 50
Name:0,dtype:int64
Exp:02 Basic Plot Using Matplotlib

Program:
Import matplotlib.pyplot as plt
a =[1,2,3,4,5]
b=[0,0.6,0.2,15,10,8,16,21]
plt.plot(a)
#o is for circle and r is for red
plt.plot(b,”or”)
plt.plot(list(range(0 , 22 , 3)))
plt.xlabel(“day ”)
plt.ylabel(“temp ”)
c=[4 ,2 ,6, 8, 3, 20, 13, 15]
plt.plot(c,label=”4th rep”)
#get current axes command
ax=plt.gca()
#get command over the individual boundary line of the graph body
ax.spines[“right”].set_visible(False)
ax.spines[“top”].set_visible(False)
#set the range or the bounds of the left boundary line to fixed range
ax.spines[“left”].set_bounds(-3,40)
#set the interval by which the x-axis set the marks
plt.ticks(list(range(-3,10)))
#set the intervals by which y-axis set the marks
Plt.yticks(list(range(-3, 20, 3)))
#legend denotes that what color signifies what
ax.legend([“1st rep”, ”2nd rep”, ”3rd rep”, “4th rep”])
#annotate command help to write on the graph any text xy denotes the
position #on the graph
plt.annotate(“Temperature v / s days”, xy = (1.01, -2.15))
plt.title(“all features discussed”)
plt.show()
Output:
Exp: 03 FrequencyDistributions,Averages, Variability

Program:
#program to get average of a list
Import numpy as np
#taking a list of elements
List1 = [2, 40, 2, 502, 177, 7, 9]
#calulate average using average()
print(np.average(List1))

output:
105.571428571422857

#program to get variance of a list

Import numpy as np
List2= [2, 4, 4, 4, 5, 5, 7, 9]
#calculate variance using var()
Print(np.var(List2))

Output:
4.0

#program to get standard deviation of a list

import numpy as np
list3 =[290, 124, 127, 899]
print(np.std(list3))
Output:
318.357550344541907

Exp:04 Normal Curves , Correlation and Scatter Plots ,

Correlation Coefficient

Program:
# program for Normal
Curve Import
matplotlib.pyplot as plt Import
numpy as np
mu , sigma =0.5, 0.1
s = np.random.normal(mu , sigma , 1000)
#create the bins and histogram
Count, bins , ignored = plt.hist(s, 20, normal=True)
Output:
#programfor Correlation and Scatter Plots
Import sklearn
Import numpy as np
Import matplotlib.pyplot as plt
Import pandas as pd
y =pd.Series([1, 2, 3, 4, 3, 5, 4])
x = pd.Series([1, 2, 3, 4, 5, 6, 7])
correlation = y.corr(x)
correlation
Output:
0.8603090020146067
#Correlation coefficient
Import math
def correlationCoefficient(x, y, n):
sum_x = 0
sum_y = 0
sum_xy = 0
squareSum_x =0
squareSum_y =0
i=0
while i< n:
sum_x = sum_x +x[i]
sum_y = sum_y +y[i]
sum_xy = sum_xy + x[i]*y[i]
squareSum_x = squareSum_x+ x[i]*x[i]
squareSum_y = squareSum_y +y[i]*y[i]
i+=1
#formula for calculating correlation coefficient
corr = (float)(n*sum_xy-sum_x*sum_y)/ (float)(math.sqrt((n*
squareSum_x – sum_x * sum_x)* (n* squareSum_y – sum_y *sum_y)))
return corr
x = [15, 18, 21, 24, 27]
y = [25, 25, 27, 31, 32]
n = len(x)
print(“{0: .6f}”.format(correlationCoefficient(x, y, n)))
Output:
0.953463
Exp:05 Regression

Program:
Import numpy as np
Import matplotlib.pyplot as plt
def estimate_coef(x, y):
#number of observations/points
n = np.size(x)
m_x = np.mean(x)
m_y = np.mean(y)
ss_xy = np.sum(y*x) – n*m_y*m_x
ss_xx = np.sum(x*x) – n*m_x*m_x
#calculate regression coefficients
b_1 = ss_xy / ss_xx
b_0 = m_y – b_1*m_x
return (b_0, b_1)
def plot_regression_line(x, y, b):
plt.scatter(x, y, color = “m”, marker = “o”, s = 30)
#predicted response vector
y_pred = b[0] + b[1]*x
plt.plot(x, y_pred, color =”g’)
plt.xlabel(“x”)
plt.ylabel(“y”)
plt.show()
def main():
#data/observation
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
b = estimate_coef(x, y)
print(“estimated coefficients:\nb_0={}\\nb_1 = {}”.format(b[0], b[1]))
plot_regression_line(x, y, b)
if name==” main ”:
main()
Output:
Exp:06 Z-Test

Program:
Import math
Import numpy as np
from numpy.random import randn
from statsmodels.stats.weightstats import ztest
#generate a random array of 50 number having mean 110 and sd 15
# similar to the IQ scores data we assume above
mean_iq = 110
sd_iq= 15/math.sqrt(50)
alpha = 0.05
null_mean = 100
data = sd_iq*randn(50) + mean_iq
print(“mean = %.2f stdv=%.2f” %(np.mean(data), np.std(data)))
ztest_Score,p_value=ztest(data,value=null_mean,alternative=
”larger”)
if(p_value< alpha):
print(“Reject Null Hypothesis”)
else:
print(“Fail To Reject Null Hypothesis”)

Output:
Reject Null Hypothesis
Exp:07 T-Test

Program:
Import numpy as np
from scipy import stats
N = 10
x = np.randam.randn(N) + 2
y = np.randam.randn(N)
var_x = x.var(ddof = 1)
var_y = y.var(ddof = 1)
# Standard deviation
SD = np.sqrt((var_x + var_y) / 2)
print(“Standard deviation =”, SD)
# Calculate the T-Statistics
tval = (x.mean() – y.mean()) / (SD * np.sqrt(2 / N))
#Compare with the critical T-Value
#Degrees of freedom
dof = 2 * N – 2
# p-value after comparision with the T-Statistics
pval = 1 – stats.t.cdf(tval, df = dof)
print(“t = “ + str(tval))
print(“p= ” + str(2 * pavl))
tval2, pval2 = stats.ttest_ind(x,
y) print(“t = ” + str(tval2))
print(“p= ” + str(pval2))

Output:
Standard deviation = 0.7642398582227466
t= 4.87688162540348
p= 0.0001212767169695983
t= 4.876881625403479
p= 0.00012127671696957205

Exp: 08 Anova

Program:
#installing the package
install.packages(“dplyr”)
# loading the package
library(dplyr)
# variance in mean within ground and between group
boxplot(mtcars$disp~factor(mtcars$gear), xlab= ”gear”, ylab = ”disp”)
#step 1: Setup Null Hypothesis and Alternate Hypothesis
#H0 = mu = mu01 = mu02(there is no difference between average displacement
#for different gear)
#H1 = Not all mean are equal
#step 2: calculate test statistics using aov function
mtcars_aovaov(mtcars$disp~factor(mtcars$gear))
summary(mtcars_aov)
#step 3: calculate F-Critical value
#For 0.05 Significant value, critical value = alpha = 0.05
#stpe 4: compare test statistics with F-critical value and conclude test.p<alpha ,
#Reject Null Hypothesis
Output:

Exp:09 Building And Validating Linear Models

Program:
Import pandas as pd
Import numpy as np
Import matplotlib.pyplot as plt
Import seaborn as sns
from sklearn.datasets import load_boston
sns.set(style=”ticks”, color_codes=True)
plt.rcParams[“figure.figsize”] = (8, 5)
plt.rcParams[“figure.dpi”] = 150
print(boston.keys())
print(boston.DESCR)
df = pd.DataFrame(boston.data, columns = boston.feature_names)
df.head()
print(df.columns)
print(df.head())
# plotting heatmap for overall data set
sns.heatmap(df.corr(), square = True, cmap = “RdYLGn”)

#now let’s plot a regression plot to see the correlation between RM and MEDV
sns.import(x = “RM”, y = “MEDV”, data = df)
Exp: 10 Building And Validating Logistics Models
Program:
Import statsmodels.api as sm
Import pandas as pd
#loading the training dataset
df = pd.read_csv(“logit_train1.csv”, index_col = 0)
xtrain = df[[“gmat”, “gpa”, “work_experience”]]
ytain = df[[“admitted”]]
log_reg = sm.Logit(ytrain, xtrain).fit()
Output:
Optimization terminated successfully.
Current function value: 0.352707
Iterations 8
print(log_reg.summary())
Output:
#predicting on new data:
df = pd.read_csv(“logit_test1.csv”, index_col = 0)
xtest = df[[“gmat”, ”gpa”, ”work_experience”]]
ytest = df[“admitted”]
yhat = log_reg.predict(Xtest)
prediction = list(map(round, yhat))
print(“Actual value”, list(ytest.value))
print(“predictions :”, prediction)
Output:
Optimization terminated successfully.
Current function value: 0.352707
Iterations 8
#testing the accuracy of the model
from sklearn.metrics import(cpnfusion_matrix, accuracy_score)
cm = confusion_matrix(ytest, prediction)
print(“Confusion matrix : \n”, cm)
print(“Test accuracy = ”, accuracy_score(ytest, prediction))
Output:
Comfusion matrix:
[[6 0]
[2 2]]
Test accuracy =0.8
Exp: 11 Time Series Analysis
Program:
Import warnings
Import itertools
Import numpy as np
Import matplotlib.pyplot as plt
warnings.filterwarnings(“ignore”)
plt.style.use(“fivethirtyeight”)
Import pandas as pd
Import statsmodels.api as sm
Import matplotlibmatplotlib.rcParams[“axes.labelsize”] = 14
matplotlib.rcParams[“xtick.labelsize”] = 12
matplotlib.rcParams[“ytick.labelsize”] = 12
matplotlib.rcParams[“text.color”] = “k”
df = pd.read_excel(“superstore.xls”)
furniture = df.loc[df[“category”] == “Furniture”]
furniture[“Order Data”].min(), furniture[“Order Data”].max()
Timestamp(“2014-01-06 00:00:00”), Timestamp(“2017-12-30 00:00:00”)
cols=[“Row ID”, “Order ID”, “Ship Data”, “Ship mode”, “Customer ID”,
“Customer name”, “Segment”, “Country”, “City”, “State”, “Postal Code”,
“Region”, “Product ID”, “Category”, “Sub-Category”, “Product Name”,
“Quantity”, “Discount”, “Profit”]
furniture.drop(cols, axis =1, inplace=True)
furniture=furniture.sort_values(“Order Data”)furniture.isnull().sum()
furniture = furniture.groupby(“OrderData”)[“Sales”].sum().reset_index()

furniture = furniture.set_index(“OrderData”)
furniture.index()
y = furniture[“Sales”].resample(“MS”).mean()
y[“2017:”]
y.plot(figsize = (15, 6))
plt.show()

DM Slip Solutions
100% (1)
DM Slip Solutions
24 pages
Aluminium Foil
0% (1)
Aluminium Foil
45 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
636379840590994941
100% (1)
636379840590994941
55 pages
Network Operating System Logbook 2 Islington College
100% (1)
Network Operating System Logbook 2 Islington College
23 pages
Amit Yadav Project
No ratings yet
Amit Yadav Project
49 pages
AD3411 DATA SCIENCE AND ANALYTICS LAB (2) - Removed
No ratings yet
AD3411 DATA SCIENCE AND ANALYTICS LAB (2) - Removed
24 pages
Stat Lab
No ratings yet
Stat Lab
24 pages
21brs1474 ML Lab 2
No ratings yet
21brs1474 ML Lab 2
25 pages
Answer PDF Lab
No ratings yet
Answer PDF Lab
34 pages
Argha's ML LAB - 240927 - 121838
No ratings yet
Argha's ML LAB - 240927 - 121838
13 pages
AD3411 - 1 To 5
No ratings yet
AD3411 - 1 To 5
11 pages
AD3411
No ratings yet
AD3411
28 pages
Gec Practicals
No ratings yet
Gec Practicals
31 pages
Hate Speech, 2016 Report
No ratings yet
Hate Speech, 2016 Report
60 pages
ML Labmanual
No ratings yet
ML Labmanual
33 pages
FDSA Lab Manual Aim Algorithm
No ratings yet
FDSA Lab Manual Aim Algorithm
32 pages
Jaguar Radio Code
0% (1)
Jaguar Radio Code
3 pages
DVA Lab Manual
No ratings yet
DVA Lab Manual
20 pages
Dav Lab Manual
No ratings yet
Dav Lab Manual
28 pages
Fdsa Record Ai&Ds
No ratings yet
Fdsa Record Ai&Ds
26 pages
Program
No ratings yet
Program
10 pages
Ee8591-Digital Signal Processing
No ratings yet
Ee8591-Digital Signal Processing
116 pages
Zerox Ready
No ratings yet
Zerox Ready
21 pages
EE8703 - Renewable Energy Systems
100% (3)
EE8703 - Renewable Energy Systems
212 pages
Data Science and Analtics Laboratory
No ratings yet
Data Science and Analtics Laboratory
21 pages
Sara
No ratings yet
Sara
160 pages
131.15 Equations of Continuity
No ratings yet
131.15 Equations of Continuity
25 pages
Dsa Lab
No ratings yet
Dsa Lab
28 pages
Equitable Leasing Corporation vs. Lucita Suyom, Marissa Enano, Myrnatamayo and Felix Oledan (G.R. No. 143360, 5 September 2002, 388 Scra 445)
No ratings yet
Equitable Leasing Corporation vs. Lucita Suyom, Marissa Enano, Myrnatamayo and Felix Oledan (G.R. No. 143360, 5 September 2002, 388 Scra 445)
10 pages
Experiment 1111
No ratings yet
Experiment 1111
25 pages
FDSA Lab Record
No ratings yet
FDSA Lab Record
30 pages
DSBDA Practicals
No ratings yet
DSBDA Practicals
16 pages
What Can You Grow Hydroponically?: Flowers
No ratings yet
What Can You Grow Hydroponically?: Flowers
11 pages
utf-8''C2M1 Assignment
No ratings yet
utf-8''C2M1 Assignment
24 pages
FDSA Lab Manual
No ratings yet
FDSA Lab Manual
31 pages
Chapter 2 Different Types of Fixtures
No ratings yet
Chapter 2 Different Types of Fixtures
20 pages
Experimenting With Data Analysis Packages and Statistical Operations
No ratings yet
Experimenting With Data Analysis Packages and Statistical Operations
18 pages
Fha-Pyhton Program Unit 1-4
No ratings yet
Fha-Pyhton Program Unit 1-4
13 pages
Datascience Lab
No ratings yet
Datascience Lab
24 pages
Augmentix: Gage Repeatability & Reproducibility
No ratings yet
Augmentix: Gage Repeatability & Reproducibility
4 pages
Network Administrator or Configuration Manager or Application de
No ratings yet
Network Administrator or Configuration Manager or Application de
2 pages
FDSA Lab Manual
No ratings yet
FDSA Lab Manual
27 pages
CS8383 - Object Oriented Programming Laboratory Manual - by LearnEngineering - in
No ratings yet
CS8383 - Object Oriented Programming Laboratory Manual - by LearnEngineering - in
30 pages
Data Science and Analtics Laboratory
No ratings yet
Data Science and Analtics Laboratory
21 pages
FDSA Lab Manual
No ratings yet
FDSA Lab Manual
32 pages
Ad3411 - Data Science and Analytics Laboratory
No ratings yet
Ad3411 - Data Science and Analytics Laboratory
26 pages
Mlalllabprgs
No ratings yet
Mlalllabprgs
17 pages
How To Build Data Pipelines For Machine Learning - by Shaw Talebi - Towards Data Science
No ratings yet
How To Build Data Pipelines For Machine Learning - by Shaw Talebi - Towards Data Science
21 pages
Dual, Low Noise, High Performance Uncompensated Operational Amplifier
No ratings yet
Dual, Low Noise, High Performance Uncompensated Operational Amplifier
5 pages
Unit 5 - Systems of Equations and Inequalities Study Guide
No ratings yet
Unit 5 - Systems of Equations and Inequalities Study Guide
6 pages
Ad3411-Data Science and Analytics Laboratory
No ratings yet
Ad3411-Data Science and Analytics Laboratory
27 pages
ML Lab
No ratings yet
ML Lab
14 pages
Python Code - Summary Statistics
No ratings yet
Python Code - Summary Statistics
6 pages
DA Manual - Part B
No ratings yet
DA Manual - Part B
13 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
Final ML File
No ratings yet
Final ML File
34 pages
ML
No ratings yet
ML
17 pages
HTML & SQL Programmes
No ratings yet
HTML & SQL Programmes
4 pages
Roll NO 2020
No ratings yet
Roll NO 2020
8 pages
Area Manager Training Programme Overview PDF
No ratings yet
Area Manager Training Programme Overview PDF
2 pages
Machine Learning Programs
No ratings yet
Machine Learning Programs
10 pages
Maharshi Dayanand Saraswati University, Ajmer
No ratings yet
Maharshi Dayanand Saraswati University, Ajmer
2 pages
Podcast Lesson Plan
No ratings yet
Podcast Lesson Plan
3 pages
ML Manual
No ratings yet
ML Manual
30 pages
Dsa Lab Manual
No ratings yet
Dsa Lab Manual
17 pages
Group 4 Travel Device
No ratings yet
Group 4 Travel Device
8 pages
ML Lab Codes
No ratings yet
ML Lab Codes
14 pages
Tickets in Integrated Public Transport System of Southern Moravia
No ratings yet
Tickets in Integrated Public Transport System of Southern Moravia
1 page
Time Series Analysis Group 9
No ratings yet
Time Series Analysis Group 9
16 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
Topo Sheet Report
No ratings yet
Topo Sheet Report
15 pages
Fds Mannual
No ratings yet
Fds Mannual
39 pages
Applied Chemistry Feb 2023
No ratings yet
Applied Chemistry Feb 2023
4 pages
Japan - ICFG International Cold Forging Group
No ratings yet
Japan - ICFG International Cold Forging Group
12 pages
DA Programs
No ratings yet
DA Programs
44 pages
Experiment 1
No ratings yet
Experiment 1
19 pages
MiniProjectTasks For OOPs
No ratings yet
MiniProjectTasks For OOPs
3 pages
ML (Sudhanshu)
No ratings yet
ML (Sudhanshu)
24 pages
CS8383-Object Oriented Programming Lab
No ratings yet
CS8383-Object Oriented Programming Lab
45 pages
Avatar Courage - AHTS Brochure Dec 2022 (Singapore Flag)
No ratings yet
Avatar Courage - AHTS Brochure Dec 2022 (Singapore Flag)
2 pages
Dispatch & Store
No ratings yet
Dispatch & Store
1 page
Method Statement For General Door-Window Fitting-Replacement
No ratings yet
Method Statement For General Door-Window Fitting-Replacement
12 pages
ML Lab Manual
No ratings yet
ML Lab Manual
28 pages
Data Analysis
No ratings yet
Data Analysis
8 pages
Cl-Vii Ass2 4301063
No ratings yet
Cl-Vii Ass2 4301063
5 pages
DA Lab ANSWERS
No ratings yet
DA Lab ANSWERS
10 pages
Madhubhan Rejou Spa Services Menu
No ratings yet
Madhubhan Rejou Spa Services Menu
10 pages
DataAnalytics Lab Manual
No ratings yet
DataAnalytics Lab Manual
35 pages
Exp 5-6-7-8
No ratings yet
Exp 5-6-7-8
8 pages
GTU Big Data Analysis Question Paper Summer 2022
No ratings yet
GTU Big Data Analysis Question Paper Summer 2022
1 page
Chandigarh Group of Colleges College of Engineering Landran, Mohali
No ratings yet
Chandigarh Group of Colleges College of Engineering Landran, Mohali
47 pages
Mcob UNIT 3 16 Marks
No ratings yet
Mcob UNIT 3 16 Marks
8 pages
Mcob UNIT 4 16 Marks
No ratings yet
Mcob UNIT 4 16 Marks
9 pages
Unit1-Unit5 2mark Question
No ratings yet
Unit1-Unit5 2mark Question
4 pages
Ad3411 Data Science and Analytics Laboratory
100% (7)
Ad3411 Data Science and Analytics Laboratory
24 pages
Ad3411 - Student
No ratings yet
Ad3411 - Student
27 pages
UNIT1-5 Formula Sheet
No ratings yet
UNIT1-5 Formula Sheet
1 page
C Language Programming Codes
From Everand
C Language Programming Codes
Durgesh
No ratings yet