0% found this document useful (0 votes)
22 views

saurabh

Uploaded by

Aman Bansal
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
22 views

saurabh

Uploaded by

Aman Bansal
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 22

PROGRAM-1

OBJECTIVE-To get the input from user and perform numerical operations
(MAX, MIN, AVG, SUM, SQRT, ROUND) using in R.

Program-
#Create a Vector
> data=c(23,4,56,21,34,56,73)
> #Get the maximum value
> print(max(data))
[1] 73
> #Get the minimum value
> print(min(data))
[1] 4
> #Get the SUM-
> sum(data)
[1] 267
> #Get the AVG-
> print(mean(data))
[1] 38.14286
> #Get the SQRT-
> a=5
> print(sqrt(a))
[1] 2.236068
> a=5.2
> #Get the ROUND-
> print(round(a))
[1] 5

PROGRAMMED BY- SAURABH MISHRA


Sample Output-

MAX & MIN-

SUM & AVG-

SQRT & ROUND-

PROGRAMMED BY- SAURABH MISHRA


PROGRAM-2
OBJECTIVE- To perform data import/export (.CSV, .XLS, .TXT) operations using
data frames in R.

Program-
#.CSV
> read.data<-read.csv("C:/Users/saurabh/OneDrive/Desktop/College
Work/5th-Semester/I.D.A.V Lab/business-financial-data-june-2023-quarter-
csv.csv")
> print(read.data)

#.XLS
> install.packages("readxl")
> library(readxl)

> excel_data <-


read_excel("C:/Users/saurabh/Downloads/file_example_XLS_10.xls
")
> print(excel_data)

#.TXT
> txt_data <-
read.table("C:/Users/saurabh/OneDrive/Desktop/R_Language.txt", header =
TRUE, sep = "\t")
> print(excel_data)
Sample Output-
CSV FILE-

PROGRAMMED BY- SAURABH MISHRA


XML FILE-

TXT FILE-

PROGRAMMED BY- SAURABH MISHRA


PROGRAM-3

OBJECTIVE- To get the input matrix from user and perform Matrix addition,
subtraction, multiplication, inverse transpose and division operations using
vector concept in R.
Program-
#MATRIX CREATION
> {r = readline("Enter the number of rows:")}
> r = as.integer(r)

> {c = readline("Enter the number of columns:")}


> c = as.integer(c)
#MTRIX VALUES:
> A = scan()
> B = scan()
> M1 = matrix(A,nrow = r,ncol = c,byrow = TRUE)
> M2 = matrix(B,nrow = r,ncol = c,byrow = TRUE)
#MATRIX M1:
> print(M1)
#MATRIX M2:
> print(M2)
#1.ADDITION-
> print(M1+M2)
#2.SUBTRACTION-
>print(M1-M2)
#3.MULTIPLICATION-

>print(M1*M2)

PROGRAMMED BY- SAURABH MISHRA


Sample Output-

PROGRAMMED BY- SAURABH MISHRA


PROGRAM-4

OBJECTIVE- To perform statistical operations (Mean, Median, Mode and


Standard deviation) using R.

Program-
# DEFINING VECTOR
> a=c(23,84,16,95,23,6,41,29,6,4,6)
#1.MEAN-
> print(mean(a))
#2.MEDIAN-
> print(median(a))
#3.MODE-
> getmode <- function(a) {
uniqv <- unique(a)
uniqv[which.max(tabulate(match(a, uniqv)))]
}

> print(getmode(a))
#4.STANDARD DEVIATION-
> print(sd(a))

Sample Output-

PROGRAMMED BY- SAURABH MISHRA


PROGRAM-5

OBJECTIVE- To perform data pre-processing operation (1) Handle mining data (2)
Min-Max normalization

Program-
#Handle Mining Data-

x = c(NA,3,4,NA,NA,NA)

is.na(x)

x = c(NA,3 4,NA,NA,0/0,0/0)

is. nan (x)

#Min-Max Normlization-

install.packages("caret")

library(caret)

data = data.frame(Var1 = c(120, 345, 145, 522, 596, 285, 21), Var2 = c(10, 15, 45, 22, 53, 28, 12),
Var3 = c(-34, 0.05, 0-15, 0-12, -6, 0·85, 0.11))

#Creating Function To Implement Min-Max Scaling-

MinMax = function (x) {(x-min(x))/(max(x)-min(x))}

#Normalize Data Using Custom Function-

Normalized_My_Data = as.data.frame(apply(data, 1, MinMax)) head(Normalized_My_Data)

#Checking Summary After Normalization-

>summary (Normalized_My_Data)

PROGRAMMED BY- SAURABH MISHRA


Sample Output-

PROGRAMMED BY- SAURABH MISHRA


PROGRAM-6
OBJECTIVE- To perform dimensionality reduction operation using PCA for
houses data set in R.

Program-
> data("USArrests")
> rawdf <- na.omit(USArrests)

> names(rawdf)=c("Murder","Assault", "Assasination”, "UrbanPop")


> head(rawdf)

> arrests.pca <- prcomp(scale(USArrests),center = TRUE)

#Checking output of pca. prcomp function returns standard deviation


(sdev), rotation and loadings-

> names(arrests.pca)
> print(arrests.pca)
> summary(arrests.pca)
> pcaCharts(arrests.pca)

> biplot(arrests.pca,scale=0, cex=.7)


> pca.out <- arrests.pca
> pca.out$rotation <- -pca.out$rotation
> pca.out$x <- -pca.out$x
> biplot(pca.out,scale=0, cex=.7)
> pca.out$rotation[,1:2]

Sample Output-

PROGRAMMED BY- SAURABH MISHRA


PROGRAMMED BY- SAURABH MISHRA
PROGRAM-7

OBJECTIVE- To perform Simple Linear Regression with R.

Program-
# Input data
x <- c(1, 2, 3, 4, 5)

y <- c(2, 4, 5, 4, 5)
# Fit a linear regression model

model <- lm(y ~ x)


# Plot the data
plot(x, y, main = "Simple Linear Regression",
xlab = "Independent Variable",
ylab = "Dependent Variable",
pch = 16, col = "blue")
# Add the regression line

abline(model, col = "red")

Sample Output-

PROGRAMMED BY- SAURABH MISHRA


PROGRAM-8
OBJECTIVE- To perform single logistic regression with R.

Program-
# Install and load necessary packages
if(!require(caTools)) install.packages("caTools")
if(!require(ggplot2)) install.packages("ggplot2")
library(caTools)
library(ggplot2)
data <- read.csv("/gender_submission.csv")

target_variable <- 'passenger' # Example target variable - this likely


needs to be changed to a column in the data.
predictor_variable <- 'Survived' # Example predictor variable
# Adjust the condition based on the unique values printed above
# Example: if the unique values are "male" and "female":

# data$Sex <- ifelse(tolower(trimws(data$Sex)) %in% c("female", "male"),


ifelse(tolower(trimws(data$Sex)) == "female", 1, 0), NA)

# Assuming there is no 'Sex' column or you don't need it


# Comment out the line below if you do want to keep the 'Sex' column:
# data$Sex <- NULL # Removing the Sex column entirely

# Split the data into training and testing sets


set.seed(123) # For reproducibility
# Since 'passenger' column does not exist in this dataset
# Using 'Survived' as the target for the split to avoid the error.
split <- sample.split(data[[predictor_variable]], SplitRatio = 0.7)
training_set <- subset(data, split == TRUE)

testing_set <- subset(data, split == FALSE)


model <- glm(as.formula(paste(predictor_variable, "~",
predictor_variable)), family = binomial, data = training_set)
summary(model)

PROGRAMMED BY- SAURABH MISHRA


# Make predictions on the testing set
predictions <- predict(model, newdata = testing_set, type = "response")
predicted_classes <- ifelse(predictions > 0.5, 1, 0)

# Evaluate the model


confusion_matrix <- table(testing_set[[predictor_variable]],
predicted_classes)
print(confusion_matrix)

# Plot the logistic regression curve - the original plot may not be
relevant
# Plotting 'Survived' against itself for demonstration.

ggplot(training_set, aes_string(x = predictor_variable, y =


predictor_variable)) +
geom_point(aes(color = factor(training_set[[predictor_variable]])),
alpha = 0.5) +
stat_smooth(method = "glm", method.args = list(family = "binomial"), se
= FALSE, color = "blue") +

labs(title = "Logistic Regression Curve", x = "Predictor Variable", y =


"Probability of Target Variable") +
theme_minimal()

PROGRAMMED BY- SAURABH MISHRA


Sample Output-

PROGRAMMED BY- SAURABH MISHRA


PROGRAM – 10
OBJECTIVE – To perform association analysis on a given dataset and evaluate
its accuracy.
Program –
!pip install mlxtend --upgrade # Upgrade mlxtend to the latest version

import numpy as np

import pandas as pd

import seaborn as sns

from matplotlib import pyplot as plt

from sklearn.cluster import KMeans

from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import classification_report, accuracy_score

from mlxtend.frequent_patterns import apriori, association_rules

# Load dataset

data = pd.read_csv('/content/Groceries_dataset.csv')

# Remove duplicate rows

data.drop_duplicates(inplace=True)

# Convert 'Date' column to datetime

data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y', errors='coerce')

# Check for missing values

data.isnull().sum()

# Extract date features (if Date column exists)

if 'Date' in data.columns:

data['year'] = data['Date'].dt.year

data['month'] = data['Date'].dt.month

data['day'] = data['Date'].dt.day

data['day_name'] = data['Date'].dt.day_name()

# Create item count per transaction

item_count = data.groupby(['Member_number', 'itemDescription',


'Date'])['itemDescription'].count().reset_index(name='Count')

PROGRAMMED BY- SAURABH MISHRA


# Add a 'Transaction' column

item_count['Transaction'] = item_count.groupby(['Member_number', 'Date']).ngroup() + 1

# Pivot the data

item_count_pivot = item_count.pivot_table(index='Transaction', columns='itemDescription',


values='Count', aggfunc='sum').fillna(0)

# Convert to integer

item_count_pivot = item_count_pivot.astype('int32')

# Apriori Algorithm

basket = item_count.groupby('Transaction')['itemDescription'].apply(list).reset_index()

basket_encoded = basket['itemDescription'].str.join('|').str.get_dummies('|')

# Generate frequent itemsets

frequent_itemsets = apriori(basket_encoded, min_support=0.01, use_colnames=True)

# Sort frequent itemsets

frequent_itemsets.sort_values('support', ascending=False).head(10)

# Generate association rules

rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.5, support_only=False,


num_itemsets=len(frequent_itemsets))

# Sort rules and display top 10

rules.sort_values('confidence', ascending=False)[['antecedents', 'consequents', 'support', 'lift',


'confidence']].head(10)

# Include 'confidence' in the column selection after sorting.

PROGRAMMED BY- SAURABH MISHRA


Sample Output-

PROGRAMMED BY- SAURABH MISHRA


PROGRAMMED BY- SAURABH MISHRA
PROGRAM-9
OBJECTIVE- To perform K-means clustering operation and visualize for iris data
set.

Program-

>install.packages(“stats”)

>install.packages(“cluster”)

>install.packages(“ClusterR”)

>library(stats)

>library(cluster)

>library(ClusterR)

> # Removing initial label of

> # Species from original dataset

> iris_1 <- iris[, -5]

> # Fitting K-Means clustering Model

> # to training dataset

> set.seed(240) # Setting seed

> kmeans.re <- kmeans(iris_1, centers = 3, nstart = 20)

> kmeans.re

> # Cluster identification for

> # each observation

> kmeans.re$cluster

> # Confusion Matrix

> cm <- table(iris$Species, kmeans.re$cluster)

> cm

> # Model Evaluation and visualization

> plot(iris_1[c("Sepal.Length", "Sepal.Width")])

> plot(iris_1[c("Sepal.Length", "Sepal.Width")],

+ col = kmeans.re$cluster)

PROGRAMMED BY- SAURABH MISHRA


> plot(iris_1[c("Sepal.Length", "Sepal.Width")],
+ col = kmeans.re$cluster,

+ main = "K-means with 3 clusters")

> ## Plotiing cluster centers

> kmeans.re$centers

> kmeans.re$centers[, c("Sepal.Length", "Sepal.Width")]

> # cex is font size, pch is symbol

> points(kmeans.re$centers[, c("Sepal.Length", "Sepal.Width")],

+ col = 1:3, pch = 8, cex = 3)

> ## Visualizing clusters

> y_kmeans <- kmeans.re$cluster

> clusplot(iris_1[, c("Sepal.Length", "Sepal.Width")],

+ y_kmeans,

+ lines = 0,

+ shade = TRUE,

+ color = TRUE,

+ labels = 2,

+ plotchar = FALSE,

+ span = TRUE,

+ main = paste("Cluster iris"),

+ xlab = 'Sepal.Length',

+ ylab = 'Sepal.Width')

Sample Output-

PROGRAMMED BY- SAURABH MISHRA


PROGRAMMED BY- SAURABH MISHRA

You might also like