0% found this document useful (0 votes)
12 views

Decision Tree

Uploaded by

Anand Kumar
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views

Decision Tree

Uploaded by

Anand Kumar
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 2

# Install necessary packages

install.packages("party", dep = TRUE)


install.packages("rpart")
install.packages("rpart.plot")

# Load required libraries


library(party)
library(rpart)
library(rpart.plot)

# Read Cardio dataset


Cardio = read.csv(file.choose(), sep = ",", header = TRUE)
head(Cardio) # Display the first few rows of the dataset
View(Cardio) # View the dataset in a separate window
summary(Cardio) # Summarize the dataset's structure
class(Cardio) # Check the class of the dataset
str(Cardio) # Display structure of the dataset
is.na(Cardio) # Check for missing values

# Factor the NSP column to create labels for different categories


Cardio$NSPF = factor(Cardio$NSP, level = c(1, 2, 3), labels = c("NORMAL", "HAVE
CHANCE", "HAVE PROBABILITY"))
str(Cardio) # Check the structure of the dataset after modification
View(Cardio) # View the dataset again after modification

# Correlation between all numeric variables (excluding the target variable)


cor(Cardio[,-23])

# Split data into training (80%) and testing (20%) sets using random sampling
set.seed(1234) # Set seed for reproducibility
ind = sample(2, nrow(Cardio), replace = TRUE, prob = c(0.8, 0.2)) # Random sampling
Cardio_train = Cardio[ind == 1, ] # Training data
Cardio_test = Cardio[ind == 2, ] # Testing data

# Build a decision tree model using the rpart method


tree = rpart(NSPF ~ LB + AC + FM, data = Cardio_train, method = "class") # Decision tree
model
rpart.plot(tree, type = 5, extra = 106) # Plot the decision tree

# Make predictions on the test set


pred = predict(tree, Cardio_test, type = "class") # Predict using the trained model
(tab = table(pred, Cardio_test$NSPF)) # Confusion matrix to evaluate the model
(acc = sum(diag(tab)) / sum(tab) * 100) # Calculate accuracy

# Build a decision tree using the 'party' package (conditional inference tree)
tree1 = ctree(NSPF ~ LB + AC + FM, data = Cardio_train) # Conditional inference tree
model
plot(tree1) # Plot the tree
# Make predictions with the conditional inference tree model
Cardio_predict = predict(tree1, Cardio_test) # Predict using the conditional inference tree
(Cardio_predict_table = table(Cardio_predict, Cardio_test$NSPF)) # Confusion matrix
(Cardio_performance = sum(diag(Cardio_predict_table)) / sum(Cardio_predict_table) * 100)
# Calculate accuracy

# Mushroom dataset (for further analysis)


data = read.csv(file.choose(), sep = ",", stringsAsFactors = TRUE) # Load mushroom
dataset
View(data) # View the dataset

You might also like