R program Lab manual
R program Lab manual
# Addition
sum_ab <- a + b
cat("Addition:", sum_ab, "\n")
# Subtraction
diff_ab <- a - b
cat("Subtraction:", diff_ab, "\n")
# Multiplication
prod_ab <- a * b
cat("Multiplication:", prod_ab, "\n")
# Division
div_ab <- a / b
cat("Division:", div_ab, "\n")
# Exponentiation
exp_ab <- a ^ b
cat("Exponentiation:", exp_ab, "\n")
# Square Root
sqrt_a <- sqrt(a)
cat("Square Root of a:", sqrt_a, "\n")
# Trigonometric Functions
angle <- 45 # in degrees
# Sine
sin_val <- sin(angle * (pi / 180))
cat("Sine:", sin_val, "\n")
# Cosine
cos_val <- cos(angle * (pi / 180))
cat("Cosine:", cos_val, "\n")
# Tangent
tan_val <- tan(angle * (pi / 180))
cat("Tangent:", tan_val, "\n")
# Logarithmic Functions
x <- 100
# Natural Logarithm
ln_x <- log(x)
cat("Natural Logarithm of x:", ln_x, "\n")
OUTPUT:
# 2. Convert to uppercase
uppercase_string <- toupper(my_string)
cat("2. Uppercase: ", uppercase_string, "\n")
# 3. Convert to lowercase
lowercase_string <- tolower(my_string)
cat("3. Lowercase: ", lowercase_string, "\n")
# 4. Substring
substring <- substr(my_string, start = 1, stop = 5)
cat("4. Substring (1-5): ", substring, "\n")
# 7. Replace a substring
replaced_string <- gsub("World", "R", my_string)
cat("7. Replaced: ", replaced_string, "\n")
OUTPUT:
# Matrix Addition
matrix_sum <- matrix_A + matrix_B
print("Matrix Addition Result:")
print(matrix_sum)
# Matrix Subtraction
matrix_diff <- matrix_A - matrix_B
print("Matrix Subtraction Result:")
print(matrix_diff)
# Matrix Multiplication
matrix_product <- matrix_A * matrix_B
print("Matrix Multiplication Result:")
print(matrix_product)
# Transpose of Matrix A
matrix_A_transpose <- t(matrix_A)
print("Transpose of Matrix A:")
print(matrix_A_transpose)
# Determinant of Matrix A
determinant_A <- det(matrix_A)
print(paste("Determinant of Matrix A:", determinant_A))
} else {
print("Matrix A is not square and determinant cannot be computed.")
}
# Find the product of Transpose of matrix "tmp" and original matrix "tmp"
tmp_product <- tmp_transpose %*% tmp
print("Product of Transpose of Matrix tmp and Matrix tmp:")
print(tmp_product)
# Print the matrix with the values of column and row numbers
matE <- abs(col(matE) - row(matE))
print("Matrix with values of column and row numbers:")
print(matE)
OUTPUT:
sudoku[1, 1] <- 3
sudoku[2, 2] <- 2
sudoku[3, 3] <- 1
print(sudoku)
OUTPUT:
>
OUTPUT:
print(attempts)
print(qualify)
print(df)
OUTPUT:
# Given dataframe
exam_data <- data.frame(
name = c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura',
'Kevin', 'Jonas'),
score = c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19),
print(str(exam_data))
OUTPUT:
# Given dataframe
> exam_data <- data.frame(
+ name = c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael'
, 'Matthew', 'Laura', 'Kevin', 'Jonas'),
+ score = c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19),
+ attempts = c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1),
+ qualify = c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no',
'yes')
+ )
>
> # Print original dataframe and its structure
> print("Original dataframe:")
[1] "Original dataframe:"
> print(exam_data)
name score attempts qualify
1 Anastasia 12.5 1 yes
2 Dima 9.0 3 no
3 Katherine 16.5 2 yes
4 James 12.0 3 no
5 Emily 9.0 2 no
6 Michael 20.0 3 yes
7 Matthew 14.5 1 yes
8 Laura 13.5 1 no
9 Kevin 8.0 2 no
10 Jonas 19.0 1 yes
> print("Structure of the said data frame:")
[1] "Structure of the said data frame:"
> print(str(exam_data))
'data.frame': 10 obs. of 4 variables:
$ name : chr "Anastasia" "Dima" "Katherine" "James" ...
$ score : num 12.5 9 16.5 12 9 20 14.5 13.5 8 19
$ attempts: num 1 3 2 3 2 3 1 1 2 1
$ qualify : chr "yes" "no" "yes" "no" ...
NULL
# Given dataframe
exam_data <- data.frame(
name = c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura',
'Kevin', 'Jonas'),
score = c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19),
attempts = c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1),
qualify = c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes')
print(exam_data)
OUTPUT:
# Given dataframe
> exam_data <- data.frame(
+ name = c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael'
, 'Matthew', 'Laura', 'Kevin', 'Jonas'),
+ score = c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19),
+ attempts = c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1),
+ qualify = c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no',
'yes')
+ )
>
> # Print original dataframe
> print("Original dataframe:")
[1] "Original dataframe:"
> print(exam_data)
name score attempts qualify
1 Anastasia 12.5 1 yes
2 Dima 9.0 3 no
3 Katherine 16.5 2 yes
4 James 12.0 3 no
5 Emily 9.0 2 no
6 Michael 20.0 3 yes
7 Matthew 14.5 1 yes
8 Laura 13.5 1 no
9 Kevin 8.0 2 no
10 Jonas 19.0 1 yes
>
> # Reorder dataframe by column name
> exam_data <- exam_data[c("name", "attempts", "score", "qualify")]
> print("Reorder by column name:")
[1] "Reorder by column name:"
> print(exam_data)
name attempts score qualify
1 Anastasia 1 12.5 yes
2 Dima 3 9.0 no
3 Katherine 2 16.5 yes
4 James 3 12.0 no
5 Emily 2 9.0 no
6 Michael 3 20.0 yes
7 Matthew 1 14.5 yes
8 Laura 1 13.5 no
9 Kevin 2 8.0 no
10 Jonas 1 19.0 yes
Part 5.E: Find elements present in two dataframes
# Given dataframes
a <- c("a", "b", "c", "d", "e")
print("Original Dataframes:")
print(a)
print(b)
print(result)
OUTPUT:
# Given dataframes
> a <- c("a", "b", "c", "d", "e")
> b <- c("d", "e", "f", "g")
>
> print("Original Dataframes:")
[1] "Original Dataframes:"
> print(a)
[1] "a" "b" "c" "d" "e"
> print(b)
[1] "d" "e" "f" "g"
>
> # Find elements present in both dataframes
> result <- intersect(a, b)
> print("Elements which are present in both dataframes:")
[1] "Elements which are present in both dataframes:"
> print(result)
[1] "d" "e"
# Order the entire data frame by the first and second column
result <- data[order(data[,1], data[,2]),]
print("Order the entire data frame by the first and second column:")
print(result)
OUTPUT:
OUTPUT:
> flips <- table(result)
> flips
result
HH HT TH TT
241 259 256 244
> result1 <- as.data.frame(flips)
> result1
result Freq
1 HH 241
2 HT 259
3 TH 256
4 TT 244
> # Plot Line graph
> plot(result1$result, result1$Freq,
+ type = '2', # line type
+ lwd = 2, # width of line
+ col = "red",# color of line
+ las = 1, # orientation of tick-mark labels
+ ylim = c(50, 1000), # range of y-axis
+ xlab = "number of tosses", # x-axis label
+ ylab = "relative frequency") # y-axis label
> abline(h = 0.5, col = 'gray50')
>
> # Plot Bar Graph
> barplot(result1$Freq, names.arg = result1$result, col = "red")
>
7. Find the 25th quantile of a binomial distribution with 25
trials and probability of success on each trial = 0.5
print(quantile_25)
print(quantile_complement)
random_values <- c(8, 14, 10, 12, 10, 14, 16, 7, 13, 12)
print(random_values)
OUTPUT :
OUTPUT :
> #find the probability of 10 successes during 12 trials where the probabi
lity of
> #success on each trial is 0.6
> dbinom(x=10, size=12, prob=.6)
[1] 0.06385228
8.B. Subha flips a fair coin 20 times. What is the probability that the
coin lands on heads exactly 7 times?
OUTPUT :
> dbinom(3, size=5, prob=0.65)
[1] 0.3364156
OUTPUT:
> pbinom(4, size=5, prob=.2)
[1] 0.99968
8. E. Suppose you have a large population of students that’s 50%
female. If students are assigned to classrooms at random, and you
visit 100 classrooms with 20 students each, then how many girls
might you expect to see in each classroom?
rbinom(100,20,0.5)
OUTPUT:
> rbinom(100,20,0.5)
[1] 14 14 7 10 9 8 10 12 11 11 12 9 8 14 9 9 7 10 10 8 8
8 7
[24] 11 13 10 10 7 11 10 8 10 11 4 10 7 11 10 12 7 7 9 9 10
13 13
[47] 8 8 12 6 11 11 10 7 6 13 9 9 10 10 13 11 8 10 9 10 9
6 13
[70] 12 7 6 14 9 12 8 9 12 12 8 6 8 10 12 10 10 13 9 10 12
5 4
[93] 8 10 13 14 8 10 8 11
9. Write a function in R that randomly draws five cards from the deck of card
face <- c("king", "queen", "jack", "ten", "nine", "eight", "seven", "six", "five", "four", "three", "two",
"ace")
num = rep(num, 4)
deck[sample(nrow(deck), n), ]
print(result)
OUTPUT :
> suit <- c("clubs", "diamonds", "hearts", "spades")
> face <- c("king", "queen", "jack", "ten", "nine", "eight", "seven", "six
", "five", "four", "three", "two", "ace")
> num <- c(13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
> deck <- data.frame(
+ face = rep(face, 4),
+ suit = c(rep("spades", 13), rep("clubs", 13), rep("diamonds", 13), rep
("hearts", 13)),
+ num = rep(num, 4)
+ )
>
> draw_n_random_cards <- function(deck, n) {
+ deck[sample(nrow(deck), n), ]
+ }
>
> result <- draw_n_random_cards(deck, 5)
> print(result)
face suit num
28 queen diamonds 12
51 two hearts 2
34 six diamonds 6
7 seven spades 7
4 ten spades 10
10.A. Find the value of the standard normal distribution pdf at x=0
dnorm(x=0, mean=0, sd=1)
# [1] 0.3989423
# by default, R uses mean=0 and sd=1
dnorm(x=0)
# [1] 0.3989423
#find the value of the normal distribution pdf at x=10 with me
an=20 and sd=5
dnorm(x=10, mean=20, sd=5)
OUTPUT:
> dnorm(x=0, mean=0, sd=1)
[1] 0.3989423
> # [1] 0.3989423
> # by default, R uses mean=0 and sd=1
> dnorm(x=0)
[1] 0.3989423
> # [1] 0.3989423
> #find the value of the normal distribution pdf at x=10 with mean=2
0 and sd=5
> dnorm(x=10, mean=20, sd=5)
[1] 0.01079819
10.B. Suppose the height of males at a certain school is normally distributed with
a mean of μ=70 inches and a standard deviation of σ = 2 inches. Approximately wh
at percentage of males at this school are taller than 74 inches?
#find the percentage of males that are taller than 74 inches in a population with
#mean = 70 and sd = 2
pnorm(74, mean=70,
sd=2, lower.tail=FALSE)
OUTPUT:
> #find the percentage of males that are taller than 74 inches in a
population with
> #mean = 70 and sd = 2
> pnorm(74, mean=70,
+ sd=2, lower.tail=FALSE)
[1] 0.02275013
10.C. Suppose the weight of a certain species of otters is normally distributed with
a mean of μ=30 lbs and a standard deviation of σ = 5 lbs. Approximately what perc
entage of this species of otters weight less than 22 lbs?
#find percentage of otters that weight less than 22 lbs in a population with
#mean = 30 and sd = 5
pnorm(22, mean=30, sd=5)
OUTPUT:
> #find percentage of otters that weight less than 22 lbs in a popul
ation with
> #mean = 30 and sd = 5
> pnorm(22, mean=30, sd=5)
[1] 0.05479929
10.D. Suppose the height of plants in a certain region is normally distributed with
a mean of μ=13 inches and a standard deviation of σ = 2 inches. Approximately wh
at percentage of plants in this region are between 10 and 14 inches tall?
#find percentage of plants that are less than 14 inches tall, then subtract the
#percentage of plants that are less than 10 inches tall, based on a population
#with mean = 13 and sd = 2
pnorm(14, mean=13, sd=2) - pnorm(10, mean=13, sd=2)
OUTPUT :
> #find percentage of plants that are less than 14 inches tall, then
subtract the
> #percentage of plants that are less than 10 inches tall, based on
a population
> #with mean = 13 and sd = 2
> pnorm(14, mean=13, sd=2) - pnorm(10, mean=13, sd=2)
[1] 0.6246553
10. E.Suppose that you have a machine that packages rice inside boxes. The
process follows a Normal distribution and it is known that the mean of the
weight of each box is 1000 grams and the standard deviation is 10 grams. W
hat is the probability of a box weighing exactly 950 grams?
pnorm(980,1000,10,lower.tail=FALSE)
#Calculate the quantile for probability 0.5 for the above scenario.
qnorm(0.5,1000,10)
rnorm(10,1000,10)
OUTPUT:
> dnorm( 950, 1000,10)
[1] 1.48672e-07
> #What is the probability of a box weighing more than 980 grams?
> pnorm(980,1000,10,lower.tail=FALSE)
[1] 0.9772499
> #Calculate the quantile for probability 0.5 for the above scenario.
> qnorm(0.5,1000,10)
[1] 1000
> #Simulate the above scenario for 10 observations.
> rnorm(10,1000,10)
[1] 1011.0971 998.3551 1006.1722 997.9489 1020.7643 1006.1655 1002.1544
[8] 1000.9453 998.5481 991.4324
Step 1: open ms excel make experience and salary data sheet as given below
Step2 : click on file save as then give the file name : salary_data.csv
Then change save as a type : CSV(comma delimited)
As shown in the figure
# Define the path to your CSV file (replace with the actual location)
csv_file_path <- "C:/Users/MOIN PASHA/OneDrive/Desktop/salary_data.csv"
# Read the salary and experience data from the CSV file
data <- read.csv(csv_file_path)
# Call the function to create the model and get predicted salary for 11 years
predicted_salary_11 <- predict_salary(11)
OUTPUT:
# Define the path to your CSV file (replace with the actual location)
> csv_file_path <- "C:/Users/MOIN PASHA/OneDrive/Desktop/salary_data.csv"
> # Read the salary and experience data from the CSV file
> data <- read.csv(csv_file_path)
>
> # Function to predict salary based on experience
> predict_salary <- function(experience) {
+ # Create the linear regression model inside the function
+ model <- lm(Salary ~ Experience, data = data) # Ensure "Salary" match
es your column name
+
+ # Predict salary for the given experience
+ predicted_salary <- predict(model, newdata = data.frame(Experience = e
xperience))
+ return(predicted_salary)
+ }
>
> # Call the function to create the model and get predicted salary for 11
years
> predicted_salary_11 <- predict_salary(11)
>
> # Test the model with experience of 11 years
> actual_salary_11 <- data[data$Experience == 11, "Salary"]
> prediction_error_11 <- abs(actual_salary_11 - predicted_salary_11)
>
> # Test the model with experience of 8 years (similar process)
> predicted_salary_8 <- predict_salary(8)
> actual_salary_8 <- data[data$Experience == 8, "Salary"]
> prediction_error_8 <- abs(actual_salary_8 - predicted_salary_8)
>
> # Print the results for 11 years
> cat("Predicted Salary for 11 years experience:", predicted_salary_11, "\
n")
Predicted Salary for 11 years experience: 62.1212
> cat("Actual Salary for 11 years experience:", actual_salary_11, "\n") #
Assuming "Salary" is the column name for actual salary
Actual Salary for 11 years experience: 59
> cat("Prediction Error for 11 years experience:", prediction_error_11, "\
n\n")
Prediction Error for 11 years experience: 3.121204
>
> # Print the results for 8 years
> cat("Predicted Salary for 8 years experience:", predicted_salary_8, "\n"
)
Predicted Salary for 8 years experience: 51.50878
> cat("Actual Salary for 8 years experience:", actual_salary_8, "\n")
Actual Salary for 8 years experience: 57
> cat("Prediction Error for 8 years experience:", prediction_error_8, "\n\
n")
Prediction Error for 8 years experience: 5.491223
>
> # Print additional messages about successful file reading or errors (opt
ional)
> if (!is.null(data)) {
+ cat("Salary and experience data loaded successfully from the CSV file.
\n")
+ } else {
+ cat("Error: Could not read data from the CSV file. Please check the fi
le path.\n")
+ }
Salary and experience data loaded successfully from the CSV file.
print(Xdisp)
print(Xhp)
print(Xwt)
OUTPUT:
> # Load the necessary dataset
> data(mtcars)
>
> # Extract the relevant columns for analysis
> input <- mtcars[, c("mpg", "disp", "hp", "wt")]
> print(head(input))
mpg disp hp wt
Mazda RX4 21.0 160 110 2.620
Mazda RX4 Wag 21.0 160 110 2.875
Datsun 710 22.8 108 93 2.320
Hornet 4 Drive 21.4 258 110 3.215
Hornet Sportabout 18.7 360 175 3.440
Valiant 18.1 225 105 3.460
>
> # Create the relationship model
> model <- lm(mpg ~ disp + hp + wt, data = input)
>
> # Show the model
> print(model)
Call:
lm(formula = mpg ~ disp + hp + wt, data = input)
Coefficients:
(Intercept) disp hp wt
37.105505 -0.000937 -0.031157 -3.800891
>
> # Get the Intercept and coefficients as vector elements
> cat("# # # # The Coefficient Values # # # ", "\n")
# # # # The Coefficient Values # # #
> a <- coef(model)[1]
> print(a)
(Intercept)
37.10551
> Xdisp <- coef(model)[2]
> Xhp <- coef(model)[3]
> Xwt <- coef(model)[4]
>
> print(Xdisp)
disp
-0.0009370091
> print(Xhp)
hp
-0.03115655
> print(Xwt)
wt
-3.800891
>
> # Note: Statistical Inference
> # Based on the above intercept and coefficient values, we create the mat
hematical equation.
> # Y = a + Xdisp*x1 + Xhp*x2 + Xwt*x3
> # or
> # Y = 37.10551 + (-0.000937)*x1 + (-0.03115655)*x2 + (-3.800891)*x3
13. Demonstrate Sampling and Sampling Distribution using Iris Data set
OUTPUT:
> # Load the necessary dataset and view its structure
> str(iris)
'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1
1 1 1 1 ...
>
> # Convert the iris dataset to a data frame
> iris_df <- data.frame(iris)
> View(iris_df)
>
> # Set the number of iterations and sample size
> iter <- 100
> n <- 5
>
> # Initialize a vector to store the means
> means <- rep(NA, iter)
>
> # Perform the iterations
> for (i in 1:iter) {
+ mean_of_each_sample <- sample(iris$Petal.Length, n)
+ means[i] <- mean(mean_of_each_sample)
+ }
>
> # Create a histogram of the means
> hist(means)
# Load datasets
bsc_a <- read.xlsx("C:/Users/MOIN PASHA/OneDrive/Documents/R programming/BSc 1
.xlsx", sheetIndex = 1)
bsc_b <- read.xlsx("C:/Users/MOIN PASHA/OneDrive/Documents/R programming/BSc 2
.xlsx", sheetIndex = 1)
# Summary statistics
summary_stats <- function(column) {
mean_val <- mean(column, na.rm = TRUE)
median_val <- median(column, na.rm = TRUE)
sd_val <- sd(column, na.rm = TRUE)
mode_val <- get_mode(column)
for (i in seq_along(relevant_columns)) {
print(paste("Summary statistics for", relevant_columns[i], ":"))
print(paste("Mean:", stats_results[[i]]$mean))
print(paste("Median:", stats_results[[i]]$median))
print(paste("Standard deviation:", stats_results[[i]]$sd))
print(paste("Mode:", stats_results[[i]]$mode))
}
# Multilinear Regression
input_b <- bsc_b[, c(relevant_columns, "Avg")]
model_b <- lm(Avg ~ ., data = input_b)
print(coef(model_b))
print(predicted_avg_moin)
print(predictive_error_moin)
Step 1: check your system has BSc A and B section excel sheet
# Parameters
alpha <- 0.05 # Significance level
OUTPUT:
> # Sample data for two groups
> group1_data <- c(22, 25, 28, 30, 32)
> group2_data <- c(18, 20, 24, 26, 28)
>
> # Parameters
> alpha <- 0.05 # Significance level
>
> # Two-sample Z-test function
> two_sample_z_test <- function(data1, data2) {
+ mean1 <- mean(data1)
+ mean2 <- mean(data2)
+ sd1 <- sd(data1)
+ sd2 <- sd(data2)
+ n1 <- length(data1)
+ n2 <- length(data2)
+ z_score <- (mean1 - mean2) / sqrt((sd1^2 / n1) + (sd2^2 / n2))
+ p_value <- 2 * pnorm(-abs(z_score)) # Two-tailed test
+ result <- list(
+ z_score = z_score,
+ p_value = p_value
+ )
+ return(result)
+ }
>
> # Perform the two-sample Z-test
> result_two_sample <- two_sample_z_test(group1_data, group2_data)
>
> # Display the result
> cat("Z-Score:", result_two_sample$z_score, "\n")
Z-Score: 1.634848
> cat("P-Value:", result_two_sample$p_value, "\n")
P-Value: 0.102081
>
> # Make a decision based on the p-value
> if (result_two_sample$p_value < alpha) {
+ cat("Reject the null hypothesis. There is a significant difference bet
ween the two groups.\n")
+ } else {
+ cat("Fail to reject the null hypothesis. There is no significant diffe
rence between the two groups.\n")
+ }
Fail to reject the null hypothesis. There is no significant difference bet
ween the two groups.