Datamining
Datamining
library(arules)
dataset=list(
c("Bread","Butter","Milk"),
c("Bread","Butter"),
c("Beer","Cookies","Diapers"),
c("Milk",'Diapers',"Bread","Butter"),
c("Beer","Diapers")
items1=transactions(dataset)
print(items1)
rules=apriori(items1,parameter=list(supp=0.4,conf=0.7,minlen=2,maxlen=10))
inspect(rules)
output
library(arules)
> dataset=list(
+ c("Bread","Butter","Milk"),
+ c("Bread","Butter"),
+ c("Beer","Cookies","Diapers"),
+ c("Milk",'Diapers',"Bread","Butter"),
+ c("Beer","Diapers")
+)
> items1=transactions(dataset)
> print(items1)
transactions in sparse format with
5 transactions (rows) and
6 items (columns)
> rules=apriori(items1,parameter=list(supp=0.4,conf=0.7,minlen=2,maxlen=10))
Apriori
Parameter specification:
confidence minval smax arem aval originalSupport maxtime support minlen maxlen target ext
0.7 0.1 1 none FALSE TRUE 5 0.4 2 10 rules TRUE
Algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE
Absolute minimum support count: 2
Program2
library(cluster)
summary(items)
plot(items)
print(data)
clusplot(items,data$cluster,color=TRUE,lines=0,labels = 2)
output:
library(cluster)
> items <- read.csv("E:/kmeans.csv")
> summary(items)
Dataset
Min. : 3.00
1st Qu.: 4.25
Median :10.00
Mean :12.83
3rd Qu.:18.75
Max. :30.00
> plot(items)
> data <- kmeans(items,2)
> print(data)
K-means clustering with 2 clusters of sizes 3, 3
Cluster means:
Dataset
1 21.66667
2 4.00000
Clustering vector:
[1] 2 2 1 2 1 1
Available components:
library(cluster)
summary(items)
plot(items)
grps<-cutree(hier,k=2)
print(grps)
rect.hclust(hier,k=2,border="green")
output:
library(cluster)
> items <- c(5,10,12,3,8)
> summary(items)
Min. 1st Qu. Median Mean 3rd Qu. Max.
3.0 5.0 8.0 7.6 10.0 12.0
> plot(items)
> data <- dist(items,method="euclidean")
> hier <- hclust(data,method="complete")
> plot(hier)
> grps<-cutree(hier,k=2)
> print(grps)
[1] 1 2 2 1 2
> rect.hclust(hier,k=2,border="green")
Program4
library(e1071)
data<-read.csv("E:/mark.csv")
head(data)
str(data)
model<-naiveBayes(Dept~.,data=data)
predictions<-predict(model,data)
print(predictions)
model1<-naiveBayes(Dept~Maths+Science,data=data)
print(model1)
library(naivebayes)
model1<-naive_bayes(Dept~Maths+Science,usekernal=T,data=data)
print(model1)
output
library(e1071)
> data<-read.csv("E:/mark.csv")
> head(data)
Dept Maths Science
1 BCA 90 93
2 CS 98 78
3 BCA 60 50
4 Physics 95 67
5 CS 50 90
6 CS 65 70
> str(data)
'data.frame': 7 obs. of 3 variables:
$ Dept : chr "BCA" "CS" "BCA" "Physics" ...
$ Maths : int 90 98 60 95 50 65 87
$ Science: int 93 78 50 67 90 70 78
> model<-naiveBayes(Dept~.,data=data)
> predictions<-predict(model,data)
> print(predictions)
[1] CS Physics BCA Physics CS CS Physics
Levels: BCA CS Physics
> model1<-naiveBayes(Dept~Maths+Science,data=data)
>
> print(model1)
Call:
naiveBayes.default(x = X, y = Y, laplace = laplace)
A-priori probabilities:
Y
BCA CS Physics
0.2857143 0.4285714 0.2857143
Conditional probabilities:
Maths
Y [,1] [,2]
BCA 75 21.213203
CS 71 24.556058
Physics 91 5.656854
Science
Y [,1] [,2]
BCA 71.50000 30.405592
CS 79.33333 10.066446
Physics 72.50000 7.778175
> library(naivebayes)
> model1<-naive_bayes(Dept~Maths+Science,usekernal=T,data=data)
> print(model1)
Call:
naive_bayes.formula(formula = Dept ~ Maths + Science, data = data,
usekernal = T)
--------------------------------------------------------------------------------------------------------------
Laplace smoothing: 0
--------------------------------------------------------------------------------------------------------------
A priori probabilities:
BCA CS Physics
0.2857143 0.4285714 0.2857143
--------------------------------------------------------------------------------------------------------------
Tables:
--------------------------------------------------------------------------------------------------------------
::: Maths (Gaussian)
--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------
::: Science (Gaussian)
--------------------------------------------------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------
>
Program5
library(party)
library(rpart)
data<-read.csv("E:/decision.csv")
head(data)
str(data)
plot(data)
data1<-as.data.frame(data)
tree<-
rpart(Play~data1$Whether+data1$Temperature+data1$Windy,data=data1,method="class",control=rpart.c
ontrol(minsplit=1,minbucket=1,cp=0))
plot(tree,main="Decision Tree")
text(tree)
output;
library(party)
> library(rpart)
> data<-read.csv("E:/decision.csv")
> head(data)
Day Whether Temperature Windy Play
1 1 Rain Mild Weak No
2 2 Normal Hot Weak Yes
3 3 Wind Mild Strong Yes
4 4 Normal Cool Weak No
5 5 Rain Hot Strong No
> str(data)
'data.frame': 5 obs. of 5 variables:
$ Day : int 1 2 3 4 5
$ Whether : chr "Rain" "Normal" "Wind" "Normal" ...
$ Temperature: chr "Mild" "Hot" "Mild" "Cool" ...
$ Windy : chr "Weak" "Weak" "Strong" "Weak" ...
$ Play : chr "No" "Yes" "Yes" "No" ...
> plot(data)
> data1<-as.data.frame(data)
> tree<-
rpart(Play~data1$Whether+data1$Temperature+data1$Windy,data=data1,method="class",control=rpart.c
ontrol(minsplit=1,minbucket=1,cp=0))
> plot(tree,main="Decision Tree")
> text(tree)
Program6
summary(relation)
output:
Call:
lm(formula = Glosteral ~ Age)
Residuals:
1 2 3 4
-6.538 18.333 -1.282 -10.513
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 60.8974 20.1572 3.021 0.0943 .
Age 0.2564 0.5232 0.490 0.6725
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Program7
#BAR PLOT
Age<-c(55,57,43,35,25)
BP<-c(140,141,130,120,120)
colors=c("red","yellow","green","violet","cyan")
barplot(Age,BP,col=colors,ylab='Age',xlab='BP,main=”Bar Plot')
#BOX PLOT
x<-c(12,24,50,33,28,43)
boxplot(x,horizontal=TRUE,main='Box Plot',xlab="Items")
#HISTOGRAM
x<-c(12,24,50,33,28,43)
hist(x)
colors=c("red","yellow","violet","green","cyan")
hist(x,col=colors,main="Histogram",xlab="Items")
#SCATTERPLOT
Age<-c(12,8,15,7,9)
Weight<-c(23,12,35,21,15)
plot(Age,Weight,ylab='Weight',xlab='Age',main='Scatter Plot')
#PIECHART
x<-c(10,30,12,23,21)
lbl<-c("US","UK","INDIA","JAPAN","FRANCE")
colors=c("red","yellow","violet","green","cyan")
pie(x,lbl,main="Pie Chart",col=colors)
output: