0% found this document useful (0 votes)
18 views

SVM

Uploaded by

Anand Kumar
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
18 views

SVM

Uploaded by

Anand Kumar
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 2

# Loading necessary libraries for SVM and data manipulation

library(e1071) # For Support Vector Machine (SVM) algorithms


install.packages("ISLR") # Install the ISLR package for datasets
library(ISLR) # Load ISLR package for access to datasets

# Load the iris dataset, which is a multi-class classification problem


data(iris)
View(iris) # View the iris dataset
str(iris) # Check the structure of the iris dataset

# Performing binary classification with the first 100 samples of iris dataset
ran <- sample(nrow(iris), 0.8 * nrow(iris)) # Randomly select 80% for training
train_data <- iris[ran, ] # Train data is the subset of the dataset
str(train_data) # Check the structure of the train data

# Train an SVM model using a radial kernel (RBF kernel)


svm_model <- svm(Species ~ ., data = train_data, kernel = "radial", cost = 5, gamma = 0.4)

# View the trained model summary


svm_model # View the details of the SVM model

# Check the number of support vectors used by the SVM model


summary(svm_model)$nSV # Display the number of support vectors used in the model

# Load the Auto dataset from the ISLR package


data(Auto)
summary(Auto) # Summary of the Auto dataset
str(Auto) # Check the structure of the Auto dataset
View(Auto) # View the Auto dataset

# Create a binary target variable 'mpg_high' based on the median of 'mpg' column
Auto$mpg_high <- as.factor(ifelse(Auto$mpg > median(Auto$mpg), "High", "Low"))

# Check the updated structure of the Auto dataset


str(Auto)

# Train-test split (70% for training and 30% for testing)


set.seed(123) # Set seed for reproducibility of the random sampling
train_index <- sample(nrow(Auto), size = 0.7 * nrow(Auto)) # Select 70% of data for training
train_data <- Auto[train_index, ] # Training set
test_data <- Auto[-train_index, ] # Testing set

# Train three different SVM models with varying cost and gamma parameters

# Model 1: SVM with radial kernel, low cost (0.1), and low gamma (0.01)
svm_model1 <- svm(mpg_high ~ ., data = train_data, kernel = "radial", cost = 0.1, gamma =
0.01)
cat("Model 1 support vectors:", summary(svm_model1)$nSV, "\n") # Check number of
support vectors for Model 1

# Model 2: SVM with radial kernel, higher cost (10), and higher gamma (0.1)
svm_model2 <- svm(mpg_high ~ ., data = train_data, kernel = "radial", cost = 10, gamma =
0.1)
cat("Model 2 support vectors:", summary(svm_model2)$nSV, "\n") # Check number of
support vectors for Model 2

# Model 3: SVM with radial kernel, higher cost (10), but default gamma (1/n)
svm_model3 <- svm(mpg_high ~ ., data = train_data, kernel = "radial", cost = 10)
cat("Model 3 support vectors:", summary(svm_model3)$nSV, "\n") # Check number of
support vectors for Model 3

# Evaluate each model's performance on the test set


pred1 <- predict(svm_model1, newdata = test_data) # Predictions using Model 1
pred2 <- predict(svm_model2, newdata = test_data) # Predictions using Model 2
pred3 <- predict(svm_model3, newdata = test_data) # Predictions using Model 3

# Calculate the accuracy for each model


accuracy1 <- mean(pred1 == test_data$mpg_high) # Model 1 accuracy
accuracy2 <- mean(pred2 == test_data$mpg_high) # Model 2 accuracy
accuracy3 <- mean(pred3 == test_data$mpg_high) # Model 3 accuracy

# Display the accuracy results for each model


cat("Model 1 accuracy:", accuracy1, "\n")
cat("Model 2 accuracy:", accuracy2, "\n")
cat("Model 3 accuracy:", accuracy3, "\n")

# Using caret and gmodels for detailed performance metrics (confusion matrix)
library(gmodels) # For CrossTable
library(caret) # For confusionMatrix

# Confusion matrix and performance metrics for Model 1


confusionMatrix(pred1, test_data$mpg_high) # Confusion matrix for Model 1
CrossTable(pred1, test_data$mpg_high) # Cross-table for Model 1

# Sensitivity, specificity, and accuracy calculations:


# Sensitivity = TP / (TP + FN) -> True Positive Rate
# Specificity = TN / (TN + FP) -> True Negative Rate
# Accuracy = (TP + TN) / (TP + TN + FP + FN)
# Error Rate = (FP + FN) / (TP + TN + FP + FN)
# Precision (Positive Predictive Value) = TP / (TP + FP)
# Recall = TP / (TP + FN)

# These metrics are available directly in the confusion matrix and CrossTable output.

You might also like