Lab File AD PDF
Lab File AD PDF
of
DATA ANALYTICS USING R (150701)
Submitted By:
Anirban Das
(0901CS211140)
JULY-DEC 2024
3
Experiment = 1
# 1. Data Types
# Numeric
num_var <- 10.5
print(paste("Numeric:", num_var))
# Integer
int_var <- 5L
print(paste("Integer:", int_var))
# Character
char_var <- "Hello, R!"
print(paste("Character:", char_var))
# Logical
log_var <- TRUE
print(paste("Logical:", log_var))
# 2. Variables
# Assigning values to variables
a <- 5
b <- 10
# 3. Basic Operations
# Addition
sum_result <- a + b
print(paste("Sum:", sum_result))
# Subtraction
sub_result <- a - b
print(paste("Subtraction:", sub_result))
# Multiplication
mul_result <- a * b
print(paste("Multiplication:", mul_result))
# Division
div_result <- a / b
print(paste("Division:", div_result))
4
OUTPUT :
5
Experiment = 2
OUTPUT :
File created
6
Experiment = 3
print("Original Data:")
print(data)
# 1. Filtering Data
# Filter for rows where Age is greater than 30
filtered_data <- data %>%
filter(Age > 30)
# 2. Selecting Columns
# Select only the Name and Score columns
selected_data <- data %>%
select(Name, Score)
# 3. Summarizing Data
# Calculate the average score
average_score <- data %>%
summarize(Average_Score = mean(Score))
print("Average Score:")
print(average_score)
7
OUTPUT :
8
Experiment = 4
library(ggplot2)
# 1. Scatter Plot
scatter_plot <- ggplot(data, aes(x = Age, y = Score)) +
geom_point(color = "blue", size = 3) +
labs(title = "Scatter Plot of Age vs Score",
x = "Age",
y = "Score") +
theme_minimal()
print(scatter_plot)
# 2. Bar Chart
# Create a bar chart for Scores by Name
bar_chart <- ggplot(data, aes(x = Name, y = Score, fill = Name)) +
geom_bar(stat = "identity") +
labs(title = "Bar Chart of Scores by Name",
x = "Name",
y = "Score") +
theme_minimal()
# 3. Histogram
# Create a histogram of Scores
histogram <- ggplot(data, aes(x = Score)) +
geom_histogram(binwidth = 5, fill = "lightblue", color = "black") +
labs(title = "Histogram of Scores",
x = "Score",
y = "Frequency") +
theme_minimal()
print(histogram)
9
OUTPUT :
10
Experiment = 5
library(ggplot2)
library(dplyr)
library(corrplot)
# 1. Summary Statistics
summary_statistics <- summary(data)
print("Summary Statistics:")
print(summary_statistics)
# 2. Visualizing Distributions
# Histogram for Age
hist_age <- ggplot(data, aes(x = Age)) +
geom_histogram(binwidth = 2, fill = "lightblue", color = "black") +
labs(title = "Histogram of Age", x = "Age", y = "Frequency") +
theme_minimal()
print(hist_age)
# 3. Correlation Matrix
correlation_matrix <- cor(data)
print("Correlation Matrix:")
print(correlation_matrix)
10
OUTPUT :
11
Experiment = 6
library(dplyr)
library(ggplot2)
print("Original Data:")
print(head(data))
# 2. Detecting Outliers
# Function to detect outliers using IQR method
detect_outliers <- function(x) {
Q1 <- quantile(x, 0.25)
Q3 <- quantile(x, 0.75)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
12
upper_bound <- Q3 + 1.5 * IQR
return(x < lower_bound | x > upper_bound)
}
# 3. Normalizing Data
# Normalizing using min-max scaling
normalize <- function(x) {
return((x - min(x)) / (max(x) - min(x)))
}
print("Normalized Data:")
print(head(data_normalized))
OUTPUT :
13
14
Experiment = 7
library(ggplot2)
# Make predictions
new_data <- data.frame(x = c(3, 5, 7)) # New x values for prediction
predictions <- predict(model, new_data)
# Show predictions
predictions
15
OUTPUT :
A data.frame: 6 × 2
x y
<dbl> <dbl>
1 7.741917 21.22482
2 3.870604 11.94559
3 5.726257 18.35010
4 6.265725 20.85671
5 5.808537 16.04875
6 4.787751 13.21240
x y
Min. :-0.9862 Min. :-3.976
1st Qu.: 3.7666 1st Qu.:11.827
Median : 5.1796 Median :15.581
Mean : 5.0650 Mean :15.185
3rd Qu.: 6.3231 3rd Qu.:19.132
Max. : 9.5733 Max. :30.757
Call:
lm(formula = y ~ x, data = data)
Residuals:
Min 1Q Median 3Q Max
-2.5640 -0.6301 -0.1046 0.6518 2.4347
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.34771 0.26710 1.302 0.196
x 2.92930 0.04881 60.018 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
16
17
Experiment = 8
library(ggplot2)
library(caret)
library(pROC)
# ROC Curve
roc_obj <- roc(test_data$Species, test_data$predicted_prob)
auc_value <- auc(roc_obj)
18
OUTPUT :
Reference
Prediction 0 1
0 33 0
1 0 12
Accuracy : 1
95% CI : (0.9213, 1)
No Information Rate : 0.7333
P-Value [Acc > NIR] : 8.681e-07
Kappa : 1
Sensitivity : 1.0000
Specificity : 1.0000
Pos Pred Value : 1.0000
Neg Pred Value : 1.0000
Prevalence : 0.7333
Detection Rate : 0.7333
Detection Prevalence : 0.7333
Balanced Accuracy : 1.0000
'Positive' Class : 0
19
Experiment = 9
library(ggplot2)
20
OUTPUT :
21
Experiment = 10
library(forecast)
library(ggplot2)
# Moving Average
# Calculate moving averages
ma_12 <- ma(AirPassengers, order = 12)
autoplot(AirPassengers) +
autolayer(ma_12, series = "12-Month Moving Average", PI = FALSE) +
labs(title = "Air Passengers with 12-Month Moving Average",
x = "Year",
y = "Number of Passengers") +
theme_minimal()
# Exponential Smoothing
# Fit exponential smoothing model
ets_model <- ets(AirPassengers)
summary(ets_model)
# ARIMA Model
# Fit an ARIMA model
arima_model <- auto.arima(AirPassengers)
22
summary(arima_model)
# Compare forecasts
autoplot(AirPassengers) +
autolayer(ets_forecast, series = "ETS Forecast", PI = FALSE) +
autolayer(arima_forecast, series = "ARIMA Forecast", PI = FALSE) +
labs(title = "Forecast Comparison: ETS vs ARIMA",
x = "Year",
y = "Number of Passengers") +
theme_minimal()
OUTPUT :
ETS(M,Ad,M)
23
Call:
ets(y = AirPassengers)
Smoothing parameters:
alpha = 0.7096
beta = 0.0204
gamma = 1e-04
phi = 0.98
Initial states:
l = 120.9939
b = 1.7705
s = 0.8944 0.7993 0.9217 1.0592 1.2203 1.2318
1.1105 0.9786 0.9804 1.011 0.8869 0.9059
sigma: 0.0392
Series: AirPassengers
ARIMA(2,1,1)(0,1,0)[12]
Coefficients:
ar1 ar2 ma1
0.5960 0.2143 -0.9819
s.e. 0.0888 0.0880 0.0292
24
sigma^2 = 132.3: log likelihood = -504.92
AIC=1017.85 AICc=1018.17 BIC=1029.35
25