R Record-1
R Record-1
UNIVERSITY)
TIRUPUR-641604.
DEPARTMENT OF COMPUTER SCIENCE
I MSC-COMPUTER SCIENCE
NAME :
REG.NO:
CERTIFICATE
L R.G GOVERNMENT ARTS COLLEGE FOR WOMEN
TIRUPUR-641604
NAME :
REGISTER NO:
CLASS :
This is to certify that it is a bonafide record of practical work done by the above student of
the I-M.SC COMPUTER SCIENCE PRACTICAL-III: DATA MINING USING R during the
academic year 2023-2024.
1 APRIORI ALGORITHM TO
EXTRACT ASSOCIATION RULES
OF DATA MINING
3 HIERARCHICAL CLUSTERING
4 CLASSIFICATION ALGORITHM
5 DECISION TREE
6 LINEAR REGRESSION
7 DATA VISUALZATION
1. APRIORI ALGORITHM TO EXTRACT ASSOCIATION RULES
OF DATA MINING
# Loading Libraries
library(arules)
library(arulesViz)
library(RColorBrewer)
# import dataset
data('Groceries')
rules<-apriori(Groceries,parameter=list(supp=0.01,conf=0.2))
inspect(rules[1:10])
arules::itemFrequencyPlot(Groceries,topN=20,
col=brewer.pal(8,'Pastel2'),
Plot', type='relative',
ylab='Item Frequency(Relative)')
OUTPUT:
# Loading Libraries
>library(arules)
> library(arulesViz)
> library(RColorBrewer)
> data('Groceries')
> rules<-apriori(Groceries,parameter=list(supp=0.01,conf=0.2))
Apriori
Parameter specification:
10 rules TRUE
Algorithmic
control:
> inspect(rules[1:10])
lift count
[ 1.0000 25
1 00 13
]
[ 1.6076 9
2 82 9
]
[ 1.9169 10
3 16 2
]
[ 1.6223 11
4 85 4
]
[ 1.7275 11
5 09 3
]
[ 1.7213 10
6 56 6
]
[ 1.5739 11
7 68 1
]
[8] 2.372268 140
> arules::itemFrequencyPlot(Groceries,topN=20,
+ col=brewer.pal(8,'Pastel2'),
+ type='relative',
+ ylab='Item Frequency(Relative)')
Relative Item Frequency Plot
2.K-Means Clustering
library(cluster)
df=USArrests
set dim(df)
head(df)
values df=na.omit(df)
dim(df)
df=scale(df)
head(df)
set.seed(1)
km=kmeans(df,centers=5,nstart=25)
print(km)
plot(df)
points(km$centers,col=1:5,pch=8,cex=2)
cnt=table(km$cluster)
print(cnt)
final_data=cbind(df,cluster=km$cluster)
head(final_data)
plot(final_data,cex=0.6,main="Final Data")
ag=aggregate(final_data,by=list(cluster=km$cluster),mean)
head(ag)
plot(ag,cex=0.6,main="Aggregate")
OUTPUT:
#K-means Clustering
> dim(df)
[1] 50 4
> head(df)
Alabam 13 23 58
a .2 6 21.2
Alaska 10. 263 48
0 44.5
Arizona 8.1 294 80
31.0
Arkansa 8. 190 50
s 8 19.5
Californ 9. 276 91
ia 0 40.6
Colorad 7. 204 78
o 9 38.7
> dim(df)
[1] 50 4
> head(df)
> print(km)
12 Cluster means:
Clustering vector:
Colorado Connecticut s
Delaware Georg
5 ia
Flori
da
5 3 3 5 1
Maryland 4 4 1 2
3 5 2 1 4
Jersey 4 4 5 2 3
Ohio 5 5 1 2
3
Vermont 2 1 5 3
Wyoming 4 3 22 4
Available components:
> plot(df)
> points(km$centers,col=1:5,pch=8,cex=2)
> print(cnt)
12345
7 10 10 11 12
> head(final_data)
> head(ag)
> plot(ag,cex=0.6,main="Aggregate")
3.HIERARCHICAL CLUSTERING
#Hierarchical Clustering
library(cluster)
df=USArrests
values df=na.omit(df)
1 df=scale(df)
head(df)
d=dist(df,method="euclidean")
#complete dendogram
hc1=hclust(d,method="complete")
plot(hc1,cex=0.6,main="complete dendogram",hang=-1)
#average dendogram
hc2=hclust(d,method="average")
plot(hc2,cex=0.6,main="Average Dendogram",hang=-1)
abline(h=3.0,col="green")
groups=cutree(hc2,k=4)
print(groups)
table(groups)
rect.hclust(hc2,k=4,border="red")
final_data=cbind(df,cluster=groups)
head(final_data)
plot(final_data,cex=0.6,main="Final Data")
OUTPUT:
#Hierarchical Clustering
> head(df)
> print(groups)
Colorado Connecticut s
Delaware Geor
3 gia
Flori
da
3 4 4 3 1
Maryland 4 4 1 4
Ohio 3 3 1 4
4 4 4 1
Vermont 4 1 3 4
Wyoming 4 4 44 4
groups
1234
7 1 12 30
> rect.hclust(hc2,k=4,border="red")
> final_data=cbind(df,cluster=groups)
> head(final_data)
Algorithm library(class)
df=data(iris)
dim(iris)
head(iris)
rand=sample(1:nrow(iris),0.9*nrow(iris))
head(rand)
method nor<-function(x)
return((x-min(x))/(max(x)-min(x)))
iris_norm=as.data.frame(lapply(iris[,c(1,2,3,4)],nor))
head(iris_norm)
#Train dataset
iris_train=iris_norm[rand,]
iris_train_target=iris[rand,5
] #Test dataset
iris_test=iris_norm[-rand,]
iris_test_target=iris[-rand,5]
dim(iris_train)
dim(iris_test)
model1=knn(train=iris_train,test=iris_test,cl=iris_train_target,k=7)
#Confusion Matric
tab=table(model1,iris_test_target)
print(tab)
accuracy=function(x)
sum(diag(x)/sum(rowSums(x)))*100
cat("Accuracy classifier=",accuracy(tab))
OUTPUT:
#Classification Algorithm
> dim(iris)
[1] 150 5
> head(iris)
> head(rand)
> nor<-function(x)
+{
+ return((x-min(x))/(max(x)-min(x)))
+}
> head(iris_norm)
> iris_train=iris_norm[rand,]
> iris_train_target=iris[rand,5]
> iris_test=iris_norm[-rand,]
> iris_test_target=iris[-rand,5]
> dim(iris_train)
[1] 135 4
> dim(iris_test)
[1] 15 4
> model1=knn(train=iris_train,test=iris_test,cl=iris_train_target,k=7)
> print(tab)
iris_test_target
virginica setosa 6 0
versicolor 0 6 1
virginica 0 0 2
> accuracy=function(x)
+{
+ sum(diag(x)/sum(rowSums(x)))*100
+}
library(rpart)
data=iris
str=data
head(data)
dtree=rpart(Sepal.Width~Sepal.Length+Petal.Width+Petal.Length+Species,data=iris,method
="anova")
print(dtree)
text(dtree,use.n=TRUE,cex=.7)
adata<-data.frame(Species='versicolor',Sepal.Length=5.1,Petal.Length=4.5,Petal.Width=1.4)
cat("Predicted Value:\n")
pt=predict(dtree,adata,method="anova")
print(pt)
plot(pt)
classification df=as.data.frame(data)
dt=rpart(Sepal.Width~Sepal.Length+Petal.Width+Petal.Length+Species,data=df,method="cl
ass")
print(dt)
text(dt,use.n=TRUE,cex=.7)
OUTPUT:
> #Decision Tree
> head(data)
n= 150
> text(dtree,use.n=TRUE,cex=.7)
Predicted Value:
2.805556
> plot(pt)
> #creating the decision tree using classification
> print(dt)
n= 150
2) Petal.Width>=0.8 100 80 3 (0.01 0.03 0.03 0.03 0.08 0.05 0.09 0.14 0.09 0.2 0.07 0.08
0.04 0.03 0 0.01 0 0.02 0 0 0 0 0)
4) Sepal.Length< 6.45 65 55 2.8 (0.015 0.046 0.046 0.046 0.11 0.062 0.14 0.15 0.11 0.14
0.015 0.046 0.031 0.046 0 0 0 0 0 0 0 0 0)
8) Petal.Width< 1.95 56 47 2.7 (0.018 0.054 0.054 0.054 0.11 0.071 0.16 0.11 0.12 0.16
0.018 0.036 0.018 0.018 0 0 0 0 0 0 0 0 0)
16) Sepal.Length< 5.55 12 9 2.4 (0.083 0 0.17 0.25 0.25 0.083 0.083 0 0 0.083 0 0 0 0
0 0 0 0 0 0 0 0 0) *
17) Sepal.Length>=5.55 44 36 2.7 (0 0.068 0.023 0 0.068 0.068 0.18 0.14 0.16 0.18
0.023 0.045 0.023 0.023 0 0 0 0 0 0 0 0 0)
34) Petal.Width< 1.55 29 23 2.9 (0 0.1 0.034 0 0.069 0.1 0.1 0.17 0.21 0.17 0 0.034 0
0 0 0 0 0 0 0 0 0 0)
68) Sepal.Length>=5.95 15 11 2.9 (0 0.2 0.067 0 0.067 0.067 0 0.2 0.27 0.067 0
0.067 0 0 0 0 0 0 0 0 0 0 0) *
35) Petal.Width>=1.55 15 10 2.7 (0 0 0 0 0.067 0 0.33 0.067 0.067 0.2 0.067 0.067
0.067 0.067 0 0 0 0 0 0 0 0 0) *
3) Petal.Width< 0.8 50 41 3.4 (0 0 0.02 0 0 0 0 0 0.02 0.12 0.08 0.1 0.04 0.18 0.12 0.06
0.06 0.08 0.04 0.02 0.02 0.02 0.02)
> text(dt,use.n=TRUE,cex=.7)
6.LINEAR REGRESSION
#Linear Regression
setwd("D:/R")
df=read.csv("h2.csv",header=TRUE)
print(df)
lr=lm(height~weight,data=df)
print(lr)
#Linear Regression
plot(df$height,df$weight,col="blue",main="Height_Weight
Regression",cex=1.3,pch=15,xlab="height",ylab="weight")
print(summary(lr))
print(residuals(lr))
coeff=coefficients(lr)
eq=paste0("y",round(coeff[2],1),"*(",round(coeff[1],1),"*x)")
print(eq)
#Linear Equation
new.weights=data.frame(weight=c(60,50)
) print(new.weights)
df1=predict(lr,newdata=new.weights)
print(df1)
df2=data.frame(df1,new.weights)
names(df2)=c("height","weight")
print(df2)
df3=rbind(df,df2)
print(df3)
write.csv(df3,"h3.csv")
pie(table(df3$height))
OUTPUT:
> #Linear Regression
> setwd("D:/R")
> df=read.csv("h2.csv",header=TRUE)
> print(df)
height weight
1 8
0
174
2 7
0
150
3 7
5
160
4 8
5
180
> lr=lm(height~weight,data=df)
> print(lr)
Call:
(Intercept) weight
4.80 2.08
> print(summary(lr))
Call:
df) Residuals:
1 2 3 4
Coefficients:
Estimate Std. Error t value Pr(>|t|)
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
> print(residuals(lr))
1 2 3 4
> print(eq)
[1] "y2.1*(4.8*x)"
> print(new.weights)
weight
1 60
2 50
> print(df1)
1 2
129.6 108.8
> print(df2)
height weight
1 129.6 60
2 108.8 50
> df3=rbind(df,df2)
> print(df3)
height
weight
1 174.0 8
0
2 150.0 7
0
3 160.0 7
5
4 180.0 8
5
11 6
129.6 0
21 5
108.8 0
> write.csv(df3,"h3.csv")
> pie(table(df3$height))
7.DATA VISUALIZATION
#Data Visualization
X=iris
dim(X)
summary(X)
head(X)
hist(X$Sepal.Length,main='Histogram',col='green')
barplot(X$Sepal.Length[1:10],main='Barplot',col='red',xlab='Sepal.Length'
) pie(table(X$Sepal.Length),main='pie-chart')
pairs(X)
plot(X$Sepal.Length,main='plot-chart',col='blue')
boxplot(X,main='Boxplot',col='yellow')
OUTPUT:
> dim(X)
[1] 150 5
> summary(X)
Sepal.Length Sepal.Width
Petal.Width Species
Mean :1.199
3rd Qu.:1.800
Max. :2.500
> head(X)
> hist(X$Sepal.Length,main='Histogram',col='green')
> barplot(X$Sepal.Length[1:10],main='Barplot',col='red',xlab='Sepal.Length')
> pie(table(X$Sepal.Length),main='pie-chart')
> pairs(X)
> plot(X$Sepal.Length,main='plot-chart',col='blue')
> boxplot(X,main='Boxplot',col='yellow')