0% found this document useful (0 votes)
2 views

R program Lab manual

The document contains a lab manual for a Statistical Computing and R Programming Language course at Nrupathunga University, detailing various R programming tasks. It includes programs demonstrating mathematical functions, string functions, and matrix operations, complete with example code and expected outputs. Each section provides practical exercises to illustrate the use of R for statistical computing.

Uploaded by

mohanreddy5772
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

R program Lab manual

The document contains a lab manual for a Statistical Computing and R Programming Language course at Nrupathunga University, detailing various R programming tasks. It includes programs demonstrating mathematical functions, string functions, and matrix operations, complete with example code and expected outputs. Each section provides practical exercises to illustrate the use of R for statistical computing.

Uploaded by

mohanreddy5772
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 46

Nrupathunga University

Department of Computer Science


VI Sem BSc (NEP) 2023-24
Statistical Computing and R Programming Language Lab
Manual
Part-A
1. Program to demonstrate Mathematical functions in R

# Define variables a and b


a <- 10
b <- 5

# Addition
sum_ab <- a + b
cat("Addition:", sum_ab, "\n")

# Subtraction
diff_ab <- a - b
cat("Subtraction:", diff_ab, "\n")

# Multiplication
prod_ab <- a * b
cat("Multiplication:", prod_ab, "\n")

# Division
div_ab <- a / b
cat("Division:", div_ab, "\n")

# Exponentiation
exp_ab <- a ^ b
cat("Exponentiation:", exp_ab, "\n")

# Square Root
sqrt_a <- sqrt(a)
cat("Square Root of a:", sqrt_a, "\n")
# Trigonometric Functions
angle <- 45 # in degrees

# Sine
sin_val <- sin(angle * (pi / 180))
cat("Sine:", sin_val, "\n")

# Cosine
cos_val <- cos(angle * (pi / 180))
cat("Cosine:", cos_val, "\n")

# Tangent
tan_val <- tan(angle * (pi / 180))
cat("Tangent:", tan_val, "\n")

# Logarithmic Functions
x <- 100

# Natural Logarithm
ln_x <- log(x)
cat("Natural Logarithm of x:", ln_x, "\n")

# Common Logarithm (Base 10)


log_x <- log10(x)
cat("Common Logarithm (Base 10) of x:", log_x, "\n")

# Random Number Generation


random_num <- runif(10, 0, 1) # Generate 10 random numbers between 0 and 1
cat("Random Number:", random_num, "\n")

OUTPUT:

# Define variables a and b


> a <- 10
> b <- 5
>
> # Addition
> sum_ab <- a + b
> cat("Addition:", sum_ab, "\n")
Addition: 15
>
> # Subtraction
> diff_ab <- a - b
> cat("Subtraction:", diff_ab, "\n")
Subtraction: 5
>
> # Multiplication
> prod_ab <- a * b
> cat("Multiplication:", prod_ab, "\n")
Multiplication: 50
>
> # Division
> div_ab <- a / b
> cat("Division:", div_ab, "\n")
Division: 2
>
> # Exponentiation
> exp_ab <- a ^ b
> cat("Exponentiation:", exp_ab, "\n")
Exponentiation: 1e+05
>
> # Square Root
> sqrt_a <- sqrt(a)
> cat("Square Root of a:", sqrt_a, "\n")
Square Root of a: 3.162278
>
> # Trigonometric Functions
> angle <- 45 # in degrees
>
> # Sine
> sin_val <- sin(angle * (pi / 180))
> cat("Sine:", sin_val, "\n")
Sine: 0.7071068
>
> # Cosine
> cos_val <- cos(angle * (pi / 180))
> cat("Cosine:", cos_val, "\n")
Cosine: 0.7071068
>
> # Tangent
> tan_val <- tan(angle * (pi / 180))
> cat("Tangent:", tan_val, "\n")
Tangent: 1
>
> # Logarithmic Functions
> x <- 100
>
> # Natural Logarithm
> ln_x <- log(x)
> cat("Natural Logarithm of x:", ln_x, "\n")
Natural Logarithm of x: 4.60517
>
> # Common Logarithm (Base 10)
> log_x <- log10(x)
> cat("Common Logarithm (Base 10) of x:", log_x, "\n")
Common Logarithm (Base 10) of x: 2
>
> # Random Number Generation
> random_num <- runif(10, 0, 1) # Generate 10 random numbers b
etween 0 and 1
> cat("Random Number:", random_num, "\n")
Random Number: 0.5486608 0.3227999 0.4051977 0.2274602 0.76546
66 0.3616712 0.07021952 0.9024233 0.2425885 0.901664

2. Program to demonstrate String functions in R


# Create a sample string
my_string <- "Hello, World!"

# 1. Length of the string


string_length <- nchar(my_string)
cat("1. Length of the string: ", string_length, "\n")

# 2. Convert to uppercase
uppercase_string <- toupper(my_string)
cat("2. Uppercase: ", uppercase_string, "\n")

# 3. Convert to lowercase
lowercase_string <- tolower(my_string)
cat("3. Lowercase: ", lowercase_string, "\n")

# 4. Substring
substring <- substr(my_string, start = 1, stop = 5)
cat("4. Substring (1-5): ", substring, "\n")

# 5. Split the string into words


my_string1 <- "Hello welcome to the World of R!"
words <- strsplit(my_string1, split = " ")[[1]]
words1 <- words[[1]]
words2 <- words[[2]]
words3 <- words[[3]]
words4 <- words[[4]]
words5 <- words[[5]]
words6 <- words[[6]]
words7 <- words[[7]]
cat("5. Split into words: ", words1, "\n")
cat("5. Split into words: ", words2, "\n")
cat("5. Split into words: ", words3, "\n")
cat("5. Split into words: ", words4, "\n")
cat("5. Split into words: ", words5, "\n")
cat("5. Split into words: ", words6, "\n")
cat("5. Split into words: ", words7, "\n")
# 6. Concatenate strings
string1 <- "Hello, "
string2 <- "World!"
concatenated_string <- paste(string1, string2)
cat("6. Concatenated: ", concatenated_string, "\n")

# 7. Replace a substring
replaced_string <- gsub("World", "R", my_string)
cat("7. Replaced: ", replaced_string, "\n")

# 8. Check if a string contains a substring


contains_substring <- grepl("Hello", my_string)
cat("8. Contains 'Hello': ", contains_substring, "\n")

OUTPUT:

# Create a sample string


> my_string <- "Hello, World!"
>
> # 1. Length of the string
> string_length <- nchar(my_string)
> cat("1. Length of the string: ", string_length, "\n")
1. Length of the string: 13
>
> # 2. Convert to uppercase
> uppercase_string <- toupper(my_string)
> cat("2. Uppercase: ", uppercase_string, "\n")
2. Uppercase: HELLO, WORLD!
>
> # 3. Convert to lowercase
> lowercase_string <- tolower(my_string)
> cat("3. Lowercase: ", lowercase_string, "\n")
3. Lowercase: hello, world!
>
> # 4. Substring
> substring <- substr(my_string, start = 1, stop = 5)
> cat("4. Substring (1-5): ", substring, "\n")
4. Substring (1-5): Hello
>
> # 5. Split the string into words
> my_string1 <- "Hello welcome to the World of R!"
> words <- strsplit(my_string1, split = " ")[[1]]
> words1 <- words[[1]]
> words2 <- words[[2]]
> words3 <- words[[3]]
> words4 <- words[[4]]
> words5 <- words[[5]]
> words6 <- words[[6]]
> words7 <- words[[7]]
> cat("5. Split into words: ", words1, "\n")
5. Split into words: Hello
> cat("5. Split into words: ", words2, "\n")
5. Split into words: welcome
> cat("5. Split into words: ", words3, "\n")
5. Split into words: to
> cat("5. Split into words: ", words4, "\n")
5. Split into words: the
> cat("5. Split into words: ", words5, "\n")
5. Split into words: World
> cat("5. Split into words: ", words6, "\n")
5. Split into words: of
> cat("5. Split into words: ", words7, "\n")
5. Split into words: R!
>
> # 6. Concatenate strings
> string1 <- "Hello, "
> string2 <- "World!"
> concatenated_string <- paste(string1, string2)
> cat("6. Concatenated: ", concatenated_string, "\n")
6. Concatenated: Hello, World!
>
> # 7. Replace a substring
> replaced_string <- gsub("World", "R", my_string)
> cat("7. Replaced: ", replaced_string, "\n")
7. Replaced: Hello, R!
>
> # 8. Check if a string contains a substring
> contains_substring <- grepl("Hello", my_string)
> cat("8. Contains 'Hello': ", contains_substring, "\n")
8. Contains 'Hello': TRUE

3. 1. Program to demonstrate Matrix Problems as described


below

# Create two example matrices


matrix_A <- matrix(1:6, nrow = 2)
matrix_B <- matrix(7:12, nrow = 2)

# Print the matrices


print("Matrix A:")
print(matrix_A)
print("Matrix B:")
print(matrix_B)

# Matrix Addition
matrix_sum <- matrix_A + matrix_B
print("Matrix Addition Result:")
print(matrix_sum)

# Matrix Subtraction
matrix_diff <- matrix_A - matrix_B
print("Matrix Subtraction Result:")
print(matrix_diff)

# Matrix Multiplication
matrix_product <- matrix_A * matrix_B
print("Matrix Multiplication Result:")
print(matrix_product)

# Transpose of Matrix A
matrix_A_transpose <- t(matrix_A)
print("Transpose of Matrix A:")
print(matrix_A_transpose)

# Inverse of Matrix A (if possible)


if (ncol(matrix_A) == nrow(matrix_A)) { # Ensure it's square before computing the determin
ant
if (det(matrix_A) != 0) {
inverse_matrix_A <- solve(matrix_A)
print("Inverse of Matrix A:")
print(inverse_matrix_A)
} else {
print("Matrix A is singular and cannot be inverted.")
}

# Determinant of Matrix A
determinant_A <- det(matrix_A)
print(paste("Determinant of Matrix A:", determinant_A))
} else {
print("Matrix A is not square and determinant cannot be computed.")
}

# Eigenvalues and Eigenvectors of Matrix A (if it's square)


if (nrow(matrix_A) == ncol(matrix_A)) {
eigen_A <- eigen(matrix_A)
eigenvalues_A <- eigen_A$values
eigenvectors_A <- eigen_A$vectors
print("Eigenvalues of Matrix A:")
print(eigenvalues_A)
print("Eigenvectors of Matrix A:")
print(eigenvectors_A)
} else {
print("Matrix A is not square and does not have eigenvalues or eigenvectors.")
}

# Matrix Concatenation (Horizontally)


matrix_concatenated <- cbind(matrix_A, matrix_B)
print("Matrix Concatenation (Horizontally):")
print(matrix_concatenated)

# Creating a new matrix "tmp"


tmp <- matrix(c(10,-10,10), nc=3, nr=15)
# Print the matrix "tmp"
print("Matrix tmp:")
print(tmp)

# Find the transpose of the matrix "tmp"


tmp_transpose <- t(tmp)
print("Transpose of Matrix tmp:")
print(tmp_transpose)

# Find the product of Transpose of matrix "tmp" and original matrix "tmp"
tmp_product <- tmp_transpose %*% tmp
print("Product of Transpose of Matrix tmp and Matrix tmp:")
print(tmp_product)

# Print the matrix with 0 entries


matE <- matrix(0, nr = 6, nc = 6)
print("Matrix with 0 entries:")
print(matE)

# Print the matrix with the values of column and row numbers
matE <- abs(col(matE) - row(matE))
print("Matrix with values of column and row numbers:")
print(matE)

# Intialize the values of row and col -1 to 1 using abs


matE[abs(col(matE) - row(matE)) == 1] <- 1
print("Matrix with initialized values:")
print(matE)

OUTPUT:

# Create two example matrices


> matrix_A <- matrix(1:6, nrow = 2)
> matrix_B <- matrix(7:12, nrow = 2)
>
> # Print the matrices
> print("Matrix A:")
[1] "Matrix A:"
> print(matrix_A)
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
> print("Matrix B:")
[1] "Matrix B:"
> print(matrix_B)
[,1] [,2] [,3]
[1,] 7 9 11
[2,] 8 10 12
>
> # Matrix Addition
> matrix_sum <- matrix_A + matrix_B
> print("Matrix Addition Result:")
[1] "Matrix Addition Result:"
> print(matrix_sum)
[,1] [,2] [,3]
[1,] 8 12 16
[2,] 10 14 18
>
> # Matrix Subtraction
> matrix_diff <- matrix_A - matrix_B
> print("Matrix Subtraction Result:")
[1] "Matrix Subtraction Result:"
> print(matrix_diff)
[,1] [,2] [,3]
[1,] -6 -6 -6
[2,] -6 -6 -6
>
> # Matrix Multiplication
> matrix_product <- matrix_A * matrix_B
> print("Matrix Multiplication Result:")
[1] "Matrix Multiplication Result:"
> print(matrix_product)
[,1] [,2] [,3]
[1,] 7 27 55
[2,] 16 40 72
>
> # Transpose of Matrix A
> matrix_A_transpose <- t(matrix_A)
> print("Transpose of Matrix A:")
[1] "Transpose of Matrix A:"
> print(matrix_A_transpose)
[,1] [,2]
[1,] 1 2
[2,] 3 4
[3,] 5 6
>
> # Inverse of Matrix A (if possible)
> if (ncol(matrix_A) == nrow(matrix_A)) { # Ensure it's square before computing the determinant
+ if (det(matrix_A) != 0) {
+ inverse_matrix_A <- solve(matrix_A)
+ print("Inverse of Matrix A:")
+ print(inverse_matrix_A)
+ } else {
+ print("Matrix A is singular and cannot be inverted.")
+ }
+
+ # Determinant of Matrix A
+ determinant_A <- det(matrix_A)
+ print(paste("Determinant of Matrix A:", determinant_A))
+ } else {
+ print("Matrix A is not square and determinant cannot be computed.")
+}
[1] "Matrix A is not square and determinant cannot be computed."
>
> # Eigenvalues and Eigenvectors of Matrix A (if it's square)
> if (nrow(matrix_A) == ncol(matrix_A)) {
+ eigen_A <- eigen(matrix_A)
+ eigenvalues_A <- eigen_A$values
+ eigenvectors_A <- eigen_A$vectors
+ print("Eigenvalues of Matrix A:")
+ print(eigenvalues_A)
+ print("Eigenvectors of Matrix A:")
+ print(eigenvectors_A)
+ } else {
+ print("Matrix A is not square and does not have eigenvalues or eigenvectors.")
+}
[1] "Matrix A is not square and does not have eigenvalues or eigenvectors."
>
> # Matrix Concatenation (Horizontally)
> matrix_concatenated <- cbind(matrix_A, matrix_B)
> print("Matrix Concatenation (Horizontally):")
[1] "Matrix Concatenation (Horizontally):"
> print(matrix_concatenated)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 3 5 7 9 11
[2,] 2 4 6 8 10 12
>
> # Creating a new matrix "tmp"
> tmp <- matrix(c(10,-10,10), nc=3, nr=15)
>
> # Print the matrix "tmp"
> print("Matrix tmp:")
[1] "Matrix tmp:"
> print(tmp)
[,1] [,2] [,3]
[1,] 10 10 10
[2,] -10 -10 -10
[3,] 10 10 10
[4,] 10 10 10
[5,] -10 -10 -10
[6,] 10 10 10
[7,] 10 10 10
[8,] -10 -10 -10
[9,] 10 10 10
[10,] 10 10 10
[11,] -10 -10 -10
[12,] 10 10 10
[13,] 10 10 10
[14,] -10 -10 -10
[15,] 10 10 10
>
> # Find the transpose of the matrix "tmp"
> tmp_transpose <- t(tmp)
> print("Transpose of Matrix tmp:")
[1] "Transpose of Matrix tmp:"
> print(tmp_transpose)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15]
[1,] 10 -10 10 10 -10 10 10 -10 10 10 -10 10 10 -10 10
[2,] 10 -10 10 10 -10 10 10 -10 10 10 -10 10 10 -10 10
[3,] 10 -10 10 10 -10 10 10 -10 10 10 -10 10 10 -10 10
>
> # Find the product of Transpose of matrix "tmp" and original matrix "tmp"
> tmp_product <- tmp_transpose %*% tmp
> print("Product of Transpose of Matrix tmp and Matrix tmp:")
[1] "Product of Transpose of Matrix tmp and Matrix tmp:"
> print(tmp_product)
[,1] [,2] [,3]
[1,] 1500 1500 1500
[2,] 1500 1500 1500
[3,] 1500 1500 1500
>
> # Print the matrix with 0 entries
> matE <- matrix(0, nr = 6, nc = 6)
> print("Matrix with 0 entries:")
[1] "Matrix with 0 entries:"
> print(matE)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0 0 0 0 0
[2,] 0 0 0 0 0 0
[3,] 0 0 0 0 0 0
[4,] 0 0 0 0 0 0
[5,] 0 0 0 0 0 0
[6,] 0 0 0 0 0 0
>
> # Print the matrix with the values of column and row numbers
> matE <- abs(col(matE) - row(matE))
> print("Matrix with values of column and row numbers:")
[1] "Matrix with values of column and row numbers:"
> print(matE)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 1 2 3 4 5
[2,] 1 0 1 2 3 4
[3,] 2 1 0 1 2 3
[4,] 3 2 1 0 1 2
[5,] 4 3 2 1 0 1
[6,] 5 4 3 2 1 0
>
> # Intialize the values of row and col -1 to 1 using abs
> matE[abs(col(matE) - row(matE)) == 1] <- 1
> print("Matrix with initialized values:")
[1] "Matrix with initialized values:"
> print(matE)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 1 2 3 4 5
[2,] 1 0 1 2 3 4
[3,] 2 1 0 1 2 3
[4,] 3 2 1 0 1 2
[5,] 4 3 2 1 0 1
[6,] 5 4 3 2 1 0
4. Write a Program to print the Sudoko in 3*3 matrix game
with three numbers

# Assign a matrix with zero entries


sudoku <- matrix(0, nrow = 3, ncol = 3)
print(sudoku)

sudoku[1, 1] <- 3
sudoku[2, 2] <- 2
sudoku[3, 3] <- 1

is_valid <- function(row, col, value) {


if (value %in% sudoku[row, ] || value %in% sudoku[, col]) {
return(FALSE)
}
return(TRUE)
}

for (row in 1:3) {


for (col in 1:3) {
if (sudoku[row, col] == 0) {
for (value in 1:3) {
if (is_valid(row, col, value)) {
sudoku[row, col] <- value
break
}
}
}
}
}

print(sudoku)

OUTPUT:

# Assign a matrix with zero entries


> sudoku <- matrix(0, nrow = 3, ncol = 3)
> print(sudoku)
[,1] [,2] [,3]
[1,] 0 0 0
[2,] 0 0 0
[3,] 0 0 0
>
> sudoku[1, 1] <- 3
> sudoku[2, 2] <- 2
> sudoku[3, 3] <- 1
>
> is_valid <- function(row, col, value) {
+ if (value %in% sudoku[row, ] || value %in% sudoku[, col]) {
+ return(FALSE)
+ }
+ return(TRUE)
+ }
>
> for (row in 1:3) {
+ for (col in 1:3) {
+ if (sudoku[row, col] == 0) {
+ for (value in 1:3) {
+ if (is_valid(row, col, value)) {
+ sudoku[row, col] <- value
+ break
+ }
+ }
+ }
+ }
+ }
>
> print(sudoku)
[,1] [,2] [,3]
[1,] 3 1 2
[2,] 1 2 3
[3,] 2 3 1

>

5. A. Write an R program to perform following operations on Dataframes

1. Write a R program to create an empty data frame


Part 5.A: Create an empty dataframe

# Create an empty dataframe with specified column types


df <- data.frame(
Ints = integer(),
Doubles = double(),
Characters = character(),
Logicals = logical(),
Factors = factor(),
stringsAsFactors = FALSE
)

# Print the structure of the empty dataframe


print("Structure of the empty dataframe:")
print(str(df))

OUTPUT:

# Create an empty dataframe with specified column types


> df <- data.frame(
+ Ints = integer(),
+ Doubles = double(),
+ Characters = character(),
+ Logicals = logical(),
+ Factors = factor(),
+ stringsAsFactors = FALSE
+ )
>
> # Print the structure of the empty dataframe
> print("Structure of the empty dataframe:")
[1] "Structure of the empty dataframe:"
> print(str(df))
'data.frame': 0 obs. of 5 variables:
$ Ints : int
$ Doubles : num
$ Characters: chr
$ Logicals : logi
$ Factors : Factor w/ 0 levels:
NULL

Part 5.B: Create a dataframe from given vectors

# Create a dataframe from given vectors


name <- c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura',
'Kevin', 'Jonas')
score <- c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19)
attempts <- c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1)
qualify <- c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes')

print("Original data frame:")


print(name)
print(score)

print(attempts)
print(qualify)

df <- data.frame(name, score, attempts, qualify)

print(df)

OUTPUT:

# Create a dataframe from given vectors


> name <- c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael',
'Matthew', 'Laura', 'Kevin', 'Jonas')
> score <- c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19)
> attempts <- c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1)
> qualify <- c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', '
yes')
>
> print("Original data frame:")
[1] "Original data frame:"
> print(name)
[1] "Anastasia" "Dima" "Katherine" "James" "Emily" "Michael"
"Matthew"
[8] "Laura" "Kevin" "Jonas"
> print(score)
[1] 12.5 9.0 16.5 12.0 9.0 20.0 14.5 13.5 8.0 19.0
> print(attempts)
[1] 1 3 2 3 2 3 1 1 2 1
> print(qualify)
[1] "yes" "no" "yes" "no" "no" "yes" "yes" "no" "no" "yes"
>
> df <- data.frame(name, score, attempts, qualify)
> print(df)
name score attempts qualify
1 Anastasia 12.5 1 yes
2 Dima 9.0 3 no
3 Katherine 16.5 2 yes
4 James 12.0 3 no
5 Emily 9.0 2 no
6 Michael 20.0 3 yes
7 Matthew 14.5 1 yes
8 Laura 13.5 1 no
9 Kevin 8.0 2 no
10 Jonas 19.0 1 yes

Part 5.C: Get the structure of a given dataframe

# Given dataframe
exam_data <- data.frame(
name = c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura',
'Kevin', 'Jonas'),
score = c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19),

attempts = c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1),


qualify = c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes')
)

# Print original dataframe and its structure


print("Original dataframe:")
print(exam_data)
print("Structure of the said data frame:")

print(str(exam_data))

OUTPUT:

# Given dataframe
> exam_data <- data.frame(
+ name = c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael'
, 'Matthew', 'Laura', 'Kevin', 'Jonas'),
+ score = c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19),
+ attempts = c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1),
+ qualify = c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no',
'yes')
+ )
>
> # Print original dataframe and its structure
> print("Original dataframe:")
[1] "Original dataframe:"
> print(exam_data)
name score attempts qualify
1 Anastasia 12.5 1 yes
2 Dima 9.0 3 no
3 Katherine 16.5 2 yes
4 James 12.0 3 no
5 Emily 9.0 2 no
6 Michael 20.0 3 yes
7 Matthew 14.5 1 yes
8 Laura 13.5 1 no
9 Kevin 8.0 2 no
10 Jonas 19.0 1 yes
> print("Structure of the said data frame:")
[1] "Structure of the said data frame:"
> print(str(exam_data))
'data.frame': 10 obs. of 4 variables:
$ name : chr "Anastasia" "Dima" "Katherine" "James" ...
$ score : num 12.5 9 16.5 12 9 20 14.5 13.5 8 19
$ attempts: num 1 3 2 3 2 3 1 1 2 1
$ qualify : chr "yes" "no" "yes" "no" ...
NULL

Part 5.D: Reorder a given dataframe by column name

# Given dataframe
exam_data <- data.frame(
name = c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura',
'Kevin', 'Jonas'),
score = c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19),
attempts = c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1),
qualify = c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes')

# Print original dataframe


print("Original dataframe:")

print(exam_data)

# Reorder dataframe by column name


exam_data <- exam_data[c("name", "attempts", "score", "qualify")]
print("Reorder by column name:")
print(exam_data)

OUTPUT:
# Given dataframe
> exam_data <- data.frame(
+ name = c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael'
, 'Matthew', 'Laura', 'Kevin', 'Jonas'),
+ score = c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19),
+ attempts = c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1),
+ qualify = c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no',
'yes')
+ )
>
> # Print original dataframe
> print("Original dataframe:")
[1] "Original dataframe:"
> print(exam_data)
name score attempts qualify
1 Anastasia 12.5 1 yes
2 Dima 9.0 3 no
3 Katherine 16.5 2 yes
4 James 12.0 3 no
5 Emily 9.0 2 no
6 Michael 20.0 3 yes
7 Matthew 14.5 1 yes
8 Laura 13.5 1 no
9 Kevin 8.0 2 no
10 Jonas 19.0 1 yes
>
> # Reorder dataframe by column name
> exam_data <- exam_data[c("name", "attempts", "score", "qualify")]
> print("Reorder by column name:")
[1] "Reorder by column name:"
> print(exam_data)
name attempts score qualify
1 Anastasia 1 12.5 yes
2 Dima 3 9.0 no
3 Katherine 2 16.5 yes
4 James 3 12.0 no
5 Emily 2 9.0 no
6 Michael 3 20.0 yes
7 Matthew 1 14.5 yes
8 Laura 1 13.5 no
9 Kevin 2 8.0 no
10 Jonas 1 19.0 yes
Part 5.E: Find elements present in two dataframes

# Given dataframes
a <- c("a", "b", "c", "d", "e")

b <- c("d", "e", "f", "g")

print("Original Dataframes:")
print(a)
print(b)

# Find elements present in both dataframes


result <- intersect(a, b)
print("Elements which are present in both dataframes:")

print(result)

OUTPUT:
# Given dataframes
> a <- c("a", "b", "c", "d", "e")
> b <- c("d", "e", "f", "g")
>
> print("Original Dataframes:")
[1] "Original Dataframes:"
> print(a)
[1] "a" "b" "c" "d" "e"
> print(b)
[1] "d" "e" "f" "g"
>
> # Find elements present in both dataframes
> result <- intersect(a, b)
> print("Elements which are present in both dataframes:")
[1] "Elements which are present in both dataframes:"
> print(result)
[1] "d" "e"

Part 5.F: Call the air quality dataset, check whether it is a


dataframe, and order it by the first and second column
# Call the air quality dataset
data <- airquality

# Check whether it is a dataframe


print("Original data: Daily air quality measurements in New York, May to September 1973.")
print(class(data))
print(head(data, 10))

# Order the entire data frame by the first and second column
result <- data[order(data[,1], data[,2]),]
print("Order the entire data frame by the first and second column:")

print(result)

OUTPUT:

# Call the air quality dataset


> data <- airquality
>
> # Check whether it is a dataframe
> print("Original data: Daily air quality measurements in New York, May to
September 1973.")
[1] "Original data: Daily air quality measurements in New York, May to Sep
tember 1973."
> print(class(data))
[1] "data.frame"
> print(head(data, 10))
Ozone Solar.R Wind Temp Month Day
1 41 190 7.4 67 5 1
2 36 118 8.0 72 5 2
3 12 149 12.6 74 5 3
4 18 313 11.5 62 5 4
5 NA NA 14.3 56 5 5
6 28 NA 14.9 66 5 6
7 23 299 8.6 65 5 7
8 19 99 13.8 59 5 8
9 8 19 20.1 61 5 9
10 NA 194 8.6 69 5 10
>
> # Order the entire data frame by the first and second column
> result <- data[order(data[,1], data[,2]),]
> print("Order the entire data frame by the first and second column:")
[1] "Order the entire data frame by the first and second column:"
> print(result)
Ozone Solar.R Wind Temp Month Day
21 1 8 9.7 59 5 21
23 4 25 9.7 61 5 23
18 6 78 18.4 57 5 18
76 7 48 14.3 80 7 15
147 7 49 10.3 69 9 24
11 7 NA 6.9 74 5 11
9 8 19 20.1 61 5 9
94 9 24 13.8 81 8 2
137 9 24 10.9 71 9 14
114 9 36 14.3 72 8 22
73 10 264 14.3 73 7 12
20 11 44 9.7 62 5 20
13 11 290 9.2 66 5 13
22 11 320 16.6 73 5 22
50 12 120 11.5 73 6 19
3 12 149 12.6 74 5 3
141 13 27 10.3 76 9 18
138 13 112 11.5 71 9 15
51 13 137 10.3 76 6 20
144 13 238 12.6 64 9 21
148 14 20 16.6 63 9 25
151 14 191 14.3 75 9 28
14 14 274 10.9 68 5 14
16 14 334 11.5 64 5 16
82 16 7 6.9 74 7 21
95 16 77 7.4 82 8 3
143 16 201 8.0 82 9 20
12 16 256 9.7 69 5 12
15 18 65 13.2 58 5 15
152 18 131 8.0 76 9 29
140 18 224 13.8 67 9 17
4 18 313 11.5 62 5 4
8 19 99 13.8 59 5 8
49 20 37 9.2 65 6 18
87 20 81 8.6 82 7 26
153 20 223 11.5 68 9 30
130 20 252 10.9 80 9 7
47 21 191 14.9 77 6 16
132 21 230 10.9 75 9 9
113 21 259 15.5 77 8 21
135 21 259 15.5 76 9 12
108 22 71 10.3 77 8 16
28 23 13 12.0 67 5 28
145 23 14 9.2 71 9 22
110 23 115 7.4 76 8 18
44 23 148 8.0 82 6 13
131 23 220 10.3 78 9 8
7 23 299 8.6 65 5 7
142 24 238 10.3 68 9 19
133 24 259 9.7 73 9 10
74 27 175 14.9 81 7 13
136 28 238 6.3 77 9 13
105 28 273 11.5 82 8 13
6 28 NA 14.9 66 5 6
38 29 127 9.7 82 6 7
149 30 193 6.9 70 9 26
19 30 322 11.5 68 5 19
111 31 244 10.9 78 8 19
24 32 92 12.0 61 5 24
129 32 92 15.5 84 9 6
64 32 236 9.2 81 7 3
17 34 307 12.0 66 5 17
78 35 274 10.3 82 7 17
97 35 NA 7.4 85 8 5
2 36 118 8.0 72 5 2
146 36 139 10.3 81 9 23
31 37 279 7.4 76 5 31
48 37 284 20.7 72 6 17
93 39 83 6.9 81 8 1
41 39 323 11.5 87 6 10
67 40 314 10.9 83 7 6
1 41 190 7.4 67 5 1
112 44 190 10.3 78 8 20
104 44 192 11.5 86 8 12
134 44 236 14.9 81 9 11
116 45 212 9.7 79 8 24
29 45 252 14.9 81 5 29
139 46 237 6.9 78 9 16
128 47 95 7.4 87 9 5
77 48 260 6.9 81 7 16
63 49 248 9.2 85 7 2
90 50 275 7.4 86 7 29
88 52 82 12.0 86 7 27
109 59 51 6.3 79 8 17
92 59 254 9.2 81 7 31
79 61 285 6.3 84 7 18
81 63 220 11.5 85 7 20
66 64 175 4.6 83 7 5
91 64 253 7.4 83 7 30
106 65 157 9.7 80 8 14
98 66 NA 4.6 87 8 6
40 71 291 13.8 90 6 9
126 73 183 2.8 93 9 3
118 73 215 8.0 86 8 26
120 76 203 9.7 97 8 28
68 77 276 5.1 88 7 7
125 78 197 5.1 92 9 2
96 78 NA 6.9 86 8 4
80 79 187 5.1 87 7 19
85 80 294 8.6 86 7 24
89 82 213 7.4 88 7 28
122 84 237 6.3 96 8 30
71 85 175 7.4 89 7 10
123 85 188 6.3 94 8 31
100 89 229 10.3 90 8 8
127 91 189 4.6 93 9 4
124 96 167 6.9 91 9 1
69 97 267 6.3 92 7 8
70 97 272 5.7 92 7 9
86 108 223 8.0 85 7 25
101 110 207 8.0 90 8 9
30 115 223 5.7 79 5 30
121 118 225 2.3 94 8 29
99 122 255 4.0 89 8 7
62 135 269 4.1 84 7 1
117 168 238 3.4 81 8 25
60 NA 31 14.9 77 6 29
58 NA 47 10.3 73 6 27
53 NA 59 1.7 76 6 22
107 NA 64 11.5 79 8 15
25 NA 66 16.6 57 5 25
54 NA 91 4.6 76 6 23
59 NA 98 11.5 80 6 28
65 NA 101 10.9 84 7 4
57 NA 127 8.0 78 6 26
56 NA 135 8.0 75 6 25
103 NA 137 11.5 86 8 11
61 NA 138 8.0 83 6 30
72 NA 139 8.6 82 7 11
150 NA 145 13.2 77 9 27
52 NA 150 6.3 77 6 21
119 NA 153 5.7 88 8 27
35 NA 186 9.2 84 6 4
10 NA 194 8.6 69 5 10
36 NA 220 8.6 85 6 5
102 NA 222 8.6 92 8 10
34 NA 242 16.1 67 6 3
43 NA 250 9.2 92 6 12
55 NA 250 6.3 76 6 24
115 NA 255 12.6 75 8 23
83 NA 258 9.7 81 7 22
42 NA 259 10.9 93 6 11
37 NA 264 14.3 79 6 6
26 NA 266 14.9 58 5 26
39 NA 273 6.9 87 6 8
32 NA 286 8.6 78 6 1
33 NA 287 9.7 74 6 2
75 NA 291 14.9 91 7 14
84 NA 295 11.5 82 7 23
46 NA 322 11.5 79 6 15
45 NA 332 13.8 80 6 14
5 NA NA 14.3 56 5 5
27 NA NA 8.0 57 5 27

6. Write an R program to Toss 2 coins using a sample


function that demonstrates probability distribution
for 100 trials.

coin <- c("HH", "HT", "TH", "TT")


result <- NULL
for (i in 1:1000) {
result[i] <- sample(coin, size = 2, replace = TRUE)
print(result[i])
}
flips <- table(result)
flips
result1 <- as.data.frame(flips)
result1
# Plot Line graph
plot(result1$result, result1$Freq,
type = '2', # line type
lwd = 2, # width of line
col = "red",# color of line
las = 1, # orientation of tick-mark labels
ylim = c(50, 1000), # range of y-axis
xlab = "number of tosses", # x-axis label
ylab = "relative frequency") # y-axis label
abline(h = 0.5, col = 'gray50')
# Plot Bar Graph
barplot(result1$Freq, names.arg = result1$result, col = "red")

OUTPUT:
> flips <- table(result)
> flips
result
HH HT TH TT
241 259 256 244
> result1 <- as.data.frame(flips)
> result1
result Freq
1 HH 241
2 HT 259
3 TH 256
4 TT 244
> # Plot Line graph
> plot(result1$result, result1$Freq,
+ type = '2', # line type
+ lwd = 2, # width of line
+ col = "red",# color of line
+ las = 1, # orientation of tick-mark labels
+ ylim = c(50, 1000), # range of y-axis
+ xlab = "number of tosses", # x-axis label
+ ylab = "relative frequency") # y-axis label
> abline(h = 0.5, col = 'gray50')
>
> # Plot Bar Graph
> barplot(result1$Freq, names.arg = result1$result, col = "red")

>
7. Find the 25th quantile of a binomial distribution with 25
trials and probability of success on each trial = 0.5

# Finding the 25th quantile of a binomial distribution

quantile_25 <- qbinom(0.25, 25, 0.5)

print(quantile_25)

# Binomial quantile for the probability 1-0.4

quantile_complement <- qbinom(0.25, 25, 0.5, lower.tail = FALSE)

print(quantile_complement)

# Generating random observations from a binomial distribution

random_values <- c(8, 14, 10, 12, 10, 14, 16, 7, 13, 12)

print(random_values)
OUTPUT :

> # Finding the 25th quantile of a binomial distribution


> quantile_25 <- qbinom(0.25, 25, 0.5)
> print(quantile_25)
[1] 11
>
> # Binomial quantile for the probability 1-0.4
> quantile_complement <- qbinom(0.25, 25, 0.5, lower.tail = FALSE)
> print(quantile_complement)
[1] 14
>
> # Generating random observations from a binomial distribution
> random_values <- c(8, 14, 10, 12, 10, 14, 16, 7, 13, 12)
> print(random_values)
[1] 8 14 10 12 10 14 16 7 13 12

8.A. Bob makes success in 60% of his free-throw attempts. If he shoot


s 12 free throws, what is the probability that he achieves exactly 10 su
ccesses?

#find the probability of 10 successes during 12 trials where the probability of

#success on each trial is 0.6


dbinom(x=10, size=12, prob=.6)

OUTPUT :
> #find the probability of 10 successes during 12 trials where the probabi
lity of
> #success on each trial is 0.6
> dbinom(x=10, size=12, prob=.6)
[1] 0.06385228

8.B. Subha flips a fair coin 20 times. What is the probability that the
coin lands on heads exactly 7 times?

# Probability of achieving exactly 7 heads in 20 coin flips


probability <- dbinom(x = 7, size = 20, prob = 0.5)
print(probability)
OUTPUT :
> # Probability of achieving exactly 7 heads in 20 coin flips
> probability <- dbinom(x = 7, size = 20, prob = 0.5)
> print(probability)
[1] 0.07392883

8.C. A Hospital database displays that the patients suffering from


cancer, 65% recover of it. What will be the probability that of 5
randomly chosen patients out of which 3 will recover?

dbinom(3, size=5, prob=0.65)

OUTPUT :
> dbinom(3, size=5, prob=0.65)
[1] 0.3364156

8. D. A bowler scores a wicket on 20% of his attempts when he


bowls. If he bowls 5 times, what would be the probability that he
scores 4 or lesser wicket?

pbinom(4, size=5, prob=.2)

OUTPUT:
> pbinom(4, size=5, prob=.2)
[1] 0.99968
8. E. Suppose you have a large population of students that’s 50%
female. If students are assigned to classrooms at random, and you
visit 100 classrooms with 20 students each, then how many girls
might you expect to see in each classroom?

rbinom(100,20,0.5)

OUTPUT:
> rbinom(100,20,0.5)
[1] 14 14 7 10 9 8 10 12 11 11 12 9 8 14 9 9 7 10 10 8 8
8 7
[24] 11 13 10 10 7 11 10 8 10 11 4 10 7 11 10 12 7 7 9 9 10
13 13
[47] 8 8 12 6 11 11 10 7 6 13 9 9 10 10 13 11 8 10 9 10 9
6 13
[70] 12 7 6 14 9 12 8 9 12 12 8 6 8 10 12 10 10 13 9 10 12
5 4
[93] 8 10 13 14 8 10 8 11

9. Write a function in R that randomly draws five cards from the deck of card

suit <- c("clubs", "diamonds", "hearts", "spades")

face <- c("king", "queen", "jack", "ten", "nine", "eight", "seven", "six", "five", "four", "three", "two",
"ace")

num <- c(13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)

deck <- data.frame(

face = rep(face, 4),

suit = c(rep("spades", 13), rep("clubs", 13), rep("diamonds", 13), rep("hearts", 13)),

num = rep(num, 4)

draw_n_random_cards <- function(deck, n) {

deck[sample(nrow(deck), n), ]

result <- draw_n_random_cards(deck, 5)

print(result)
OUTPUT :
> suit <- c("clubs", "diamonds", "hearts", "spades")
> face <- c("king", "queen", "jack", "ten", "nine", "eight", "seven", "six
", "five", "four", "three", "two", "ace")
> num <- c(13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
> deck <- data.frame(
+ face = rep(face, 4),
+ suit = c(rep("spades", 13), rep("clubs", 13), rep("diamonds", 13), rep
("hearts", 13)),
+ num = rep(num, 4)
+ )
>
> draw_n_random_cards <- function(deck, n) {
+ deck[sample(nrow(deck), n), ]
+ }
>
> result <- draw_n_random_cards(deck, 5)
> print(result)
face suit num
28 queen diamonds 12
51 two hearts 2
34 six diamonds 6
7 seven spades 7
4 ten spades 10

10.A. Find the value of the standard normal distribution pdf at x=0
dnorm(x=0, mean=0, sd=1)
# [1] 0.3989423
# by default, R uses mean=0 and sd=1
dnorm(x=0)
# [1] 0.3989423
#find the value of the normal distribution pdf at x=10 with me
an=20 and sd=5
dnorm(x=10, mean=20, sd=5)

OUTPUT:
> dnorm(x=0, mean=0, sd=1)
[1] 0.3989423
> # [1] 0.3989423
> # by default, R uses mean=0 and sd=1
> dnorm(x=0)
[1] 0.3989423
> # [1] 0.3989423
> #find the value of the normal distribution pdf at x=10 with mean=2
0 and sd=5
> dnorm(x=10, mean=20, sd=5)
[1] 0.01079819

10.B. Suppose the height of males at a certain school is normally distributed with
a mean of μ=70 inches and a standard deviation of σ = 2 inches. Approximately wh
at percentage of males at this school are taller than 74 inches?
#find the percentage of males that are taller than 74 inches in a population with
#mean = 70 and sd = 2
pnorm(74, mean=70,
sd=2, lower.tail=FALSE)

OUTPUT:
> #find the percentage of males that are taller than 74 inches in a
population with
> #mean = 70 and sd = 2
> pnorm(74, mean=70,
+ sd=2, lower.tail=FALSE)
[1] 0.02275013

10.C. Suppose the weight of a certain species of otters is normally distributed with
a mean of μ=30 lbs and a standard deviation of σ = 5 lbs. Approximately what perc
entage of this species of otters weight less than 22 lbs?

#find percentage of otters that weight less than 22 lbs in a population with
#mean = 30 and sd = 5
pnorm(22, mean=30, sd=5)

OUTPUT:
> #find percentage of otters that weight less than 22 lbs in a popul
ation with
> #mean = 30 and sd = 5
> pnorm(22, mean=30, sd=5)
[1] 0.05479929

10.D. Suppose the height of plants in a certain region is normally distributed with
a mean of μ=13 inches and a standard deviation of σ = 2 inches. Approximately wh
at percentage of plants in this region are between 10 and 14 inches tall?

#find percentage of plants that are less than 14 inches tall, then subtract the
#percentage of plants that are less than 10 inches tall, based on a population
#with mean = 13 and sd = 2
pnorm(14, mean=13, sd=2) - pnorm(10, mean=13, sd=2)

OUTPUT :

> #find percentage of plants that are less than 14 inches tall, then
subtract the
> #percentage of plants that are less than 10 inches tall, based on
a population
> #with mean = 13 and sd = 2
> pnorm(14, mean=13, sd=2) - pnorm(10, mean=13, sd=2)
[1] 0.6246553
10. E.Suppose that you have a machine that packages rice inside boxes. The
process follows a Normal distribution and it is known that the mean of the
weight of each box is 1000 grams and the standard deviation is 10 grams. W
hat is the probability of a box weighing exactly 950 grams?

dnorm( 950, 1000,10)

#What is the probability of a box weighing more than 980 grams?

pnorm(980,1000,10,lower.tail=FALSE)

#Calculate the quantile for probability 0.5 for the above scenario.

qnorm(0.5,1000,10)

#Simulate the above scenario for 10 observations.

rnorm(10,1000,10)

OUTPUT:
> dnorm( 950, 1000,10)
[1] 1.48672e-07
> #What is the probability of a box weighing more than 980 grams?
> pnorm(980,1000,10,lower.tail=FALSE)
[1] 0.9772499
> #Calculate the quantile for probability 0.5 for the above scenario.
> qnorm(0.5,1000,10)
[1] 1000
> #Simulate the above scenario for 10 observations.
> rnorm(10,1000,10)
[1] 1011.0971 998.3551 1006.1722 997.9489 1020.7643 1006.1655 1002.1544
[8] 1000.9453 998.5481 991.4324

11. Apply Linear Regression Technique


1. derive a model
2. test the data
3. Calculate the Predictive error for the following
data

Steps to execute this program :

Step 1: open ms excel make experience and salary data sheet as given below
Step2 : click on file save as then give the file name : salary_data.csv
Then change save as a type : CSV(comma delimited)
As shown in the figure

Step 3: open R studio type the program

# Define the path to your CSV file (replace with the actual location)
csv_file_path <- "C:/Users/MOIN PASHA/OneDrive/Desktop/salary_data.csv"
# Read the salary and experience data from the CSV file
data <- read.csv(csv_file_path)

# Function to predict salary based on experience


predict_salary <- function(experience) {
# Create the linear regression model inside the function
model <- lm(Salary ~ Experience, data = data) # Ensure "Salary" matches your column
name

# Predict salary for the given experience


predicted_salary <- predict(model, newdata = data.frame(Experience = experience))
return(predicted_salary)
}

# Call the function to create the model and get predicted salary for 11 years
predicted_salary_11 <- predict_salary(11)

# Test the model with experience of 11 years


actual_salary_11 <- data[data$Experience == 11, "Salary"]
prediction_error_11 <- abs(actual_salary_11 - predicted_salary_11)

# Test the model with experience of 8 years (similar process)


predicted_salary_8 <- predict_salary(8)
actual_salary_8 <- data[data$Experience == 8, "Salary"]
prediction_error_8 <- abs(actual_salary_8 - predicted_salary_8)

# Print the results for 11 years


cat("Predicted Salary for 11 years experience:", predicted_salary_11, "\n")
cat("Actual Salary for 11 years experience:", actual_salary_11, "\n") # Assuming "Salary
" is the column name for actual salary
cat("Prediction Error for 11 years experience:", prediction_error_11, "\n\n")

# Print the results for 8 years


cat("Predicted Salary for 8 years experience:", predicted_salary_8, "\n")
cat("Actual Salary for 8 years experience:", actual_salary_8, "\n")
cat("Prediction Error for 8 years experience:", prediction_error_8, "\n\n")

# Print additional messages about successful file reading or errors (optional)


if (!is.null(data)) {
cat("Salary and experience data loaded successfully from the CSV file.\n")
} else {
cat("Error: Could not read data from the CSV file. Please check the file path.\n")
}

Step 4: go to document where you save that salary_data.csv copy that


path
Copy this path then paste in the program

Step 5: paste the path in the program

Step 6: change the backslash to frontslash

OUTPUT:
# Define the path to your CSV file (replace with the actual location)
> csv_file_path <- "C:/Users/MOIN PASHA/OneDrive/Desktop/salary_data.csv"
> # Read the salary and experience data from the CSV file
> data <- read.csv(csv_file_path)
>
> # Function to predict salary based on experience
> predict_salary <- function(experience) {
+ # Create the linear regression model inside the function
+ model <- lm(Salary ~ Experience, data = data) # Ensure "Salary" match
es your column name
+
+ # Predict salary for the given experience
+ predicted_salary <- predict(model, newdata = data.frame(Experience = e
xperience))
+ return(predicted_salary)
+ }
>
> # Call the function to create the model and get predicted salary for 11
years
> predicted_salary_11 <- predict_salary(11)
>
> # Test the model with experience of 11 years
> actual_salary_11 <- data[data$Experience == 11, "Salary"]
> prediction_error_11 <- abs(actual_salary_11 - predicted_salary_11)
>
> # Test the model with experience of 8 years (similar process)
> predicted_salary_8 <- predict_salary(8)
> actual_salary_8 <- data[data$Experience == 8, "Salary"]
> prediction_error_8 <- abs(actual_salary_8 - predicted_salary_8)
>
> # Print the results for 11 years
> cat("Predicted Salary for 11 years experience:", predicted_salary_11, "\
n")
Predicted Salary for 11 years experience: 62.1212
> cat("Actual Salary for 11 years experience:", actual_salary_11, "\n") #
Assuming "Salary" is the column name for actual salary
Actual Salary for 11 years experience: 59
> cat("Prediction Error for 11 years experience:", prediction_error_11, "\
n\n")
Prediction Error for 11 years experience: 3.121204

>
> # Print the results for 8 years
> cat("Predicted Salary for 8 years experience:", predicted_salary_8, "\n"
)
Predicted Salary for 8 years experience: 51.50878
> cat("Actual Salary for 8 years experience:", actual_salary_8, "\n")
Actual Salary for 8 years experience: 57
> cat("Prediction Error for 8 years experience:", prediction_error_8, "\n\
n")
Prediction Error for 8 years experience: 5.491223

>
> # Print additional messages about successful file reading or errors (opt
ional)
> if (!is.null(data)) {
+ cat("Salary and experience data loaded successfully from the CSV file.
\n")
+ } else {
+ cat("Error: Could not read data from the CSV file. Please check the fi
le path.\n")
+ }
Salary and experience data loaded successfully from the CSV file.

12. Demonstrate Multilinear Regression for mtcars

# Load the necessary dataset


data(mtcars)

# Extract the relevant columns for analysis


input <- mtcars[, c("mpg", "disp", "hp", "wt")]
print(head(input))

# Create the relationship model


model <- lm(mpg ~ disp + hp + wt, data = input)
# Show the model
print(model)

# Get the Intercept and coefficients as vector elements


cat("# # # # The Coefficient Values # # # ", "\n")
a <- coef(model)[1]
print(a)
Xdisp <- coef(model)[2]
Xhp <- coef(model)[3]
Xwt <- coef(model)[4]

print(Xdisp)
print(Xhp)
print(Xwt)

# Note: Statistical Inference


# Based on the above intercept and coefficient values, we create the math
ematical equation.
# Y = a + Xdisp*x1 + Xhp*x2 + Xwt*x3
# or
# Y = 37.10551 + (-0.000937)*x1 + (-0.03115655)*x2 + (-3.800891)*x3

OUTPUT:
> # Load the necessary dataset
> data(mtcars)
>
> # Extract the relevant columns for analysis
> input <- mtcars[, c("mpg", "disp", "hp", "wt")]
> print(head(input))
mpg disp hp wt
Mazda RX4 21.0 160 110 2.620
Mazda RX4 Wag 21.0 160 110 2.875
Datsun 710 22.8 108 93 2.320
Hornet 4 Drive 21.4 258 110 3.215
Hornet Sportabout 18.7 360 175 3.440
Valiant 18.1 225 105 3.460
>
> # Create the relationship model
> model <- lm(mpg ~ disp + hp + wt, data = input)
>
> # Show the model
> print(model)

Call:
lm(formula = mpg ~ disp + hp + wt, data = input)

Coefficients:
(Intercept) disp hp wt
37.105505 -0.000937 -0.031157 -3.800891

>
> # Get the Intercept and coefficients as vector elements
> cat("# # # # The Coefficient Values # # # ", "\n")
# # # # The Coefficient Values # # #
> a <- coef(model)[1]
> print(a)
(Intercept)
37.10551
> Xdisp <- coef(model)[2]
> Xhp <- coef(model)[3]
> Xwt <- coef(model)[4]
>
> print(Xdisp)
disp
-0.0009370091
> print(Xhp)
hp
-0.03115655
> print(Xwt)
wt
-3.800891
>
> # Note: Statistical Inference
> # Based on the above intercept and coefficient values, we create the mat
hematical equation.
> # Y = a + Xdisp*x1 + Xhp*x2 + Xwt*x3
> # or
> # Y = 37.10551 + (-0.000937)*x1 + (-0.03115655)*x2 + (-3.800891)*x3

13. Demonstrate Sampling and Sampling Distribution using Iris Data set

# Load the necessary dataset and view its structure


str(iris)

# Convert the iris dataset to a data frame


iris_df <- data.frame(iris)
View(iris_df)

# Set the number of iterations and sample size


iter <- 100
n <- 5

# Initialize a vector to store the means


means <- rep(NA, iter)

# Perform the iterations


for (i in 1:iter) {
mean_of_each_sample <- sample(iris$Petal.Length, n)
means[i] <- mean(mean_of_each_sample)
}

# Create a histogram of the means


hist(means)

OUTPUT:
> # Load the necessary dataset and view its structure
> str(iris)
'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1
1 1 1 1 ...
>
> # Convert the iris dataset to a data frame
> iris_df <- data.frame(iris)
> View(iris_df)
>
> # Set the number of iterations and sample size
> iter <- 100
> n <- 5
>
> # Initialize a vector to store the means
> means <- rep(NA, iter)
>
> # Perform the iterations
> for (i in 1:iter) {
+ mean_of_each_sample <- sample(iris$Petal.Length, n)
+ means[i] <- mean(mean_of_each_sample)
+ }
>
> # Create a histogram of the means
> hist(means)

14. Demonstrate the following queries on data sets of BSc A and


B section Results.
library("xlsx")

# Load datasets
bsc_a <- read.xlsx("C:/Users/MOIN PASHA/OneDrive/Documents/R programming/BSc 1
.xlsx", sheetIndex = 1)
bsc_b <- read.xlsx("C:/Users/MOIN PASHA/OneDrive/Documents/R programming/BSc 2
.xlsx", sheetIndex = 1)

# Print column names and number of rows


print("Columns in BSc A dataset:")
print(names(bsc_a))

print("Columns in BSc B dataset:")


print(names(bsc_b))

print(paste("Number of rows in BSc A dataset:", nrow(bsc_a)))


print(paste("Number of rows in BSc B dataset:", nrow(bsc_b)))

# Analysis on BSc B dataset


students_below_40_b <- sum(bsc_b$R.Programming < 40, na.rm = TRUE)
students_greater_90_b <- sum(bsc_b$R.Programming > 90, na.rm = TRUE)

print(paste("Number of students below 40 in R.Programming in BSc B dataset:", student


s_below_40_b))
print(paste("Number of students above 90 in R.Programming in BSc B dataset:", student
s_greater_90_b))

# Summary statistics
summary_stats <- function(column) {
mean_val <- mean(column, na.rm = TRUE)
median_val <- median(column, na.rm = TRUE)
sd_val <- sd(column, na.rm = TRUE)
mode_val <- get_mode(column)

return(list(mean = mean_val, median = median_val, sd = sd_val, mode = mode_val))


}

get_mode <- function(v) {


uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
relevant_columns <- c("R.Programming", "Web.Development", "Optical.Fiber", "IoT.and.
Robotics")
stats_results <- lapply(bsc_b[relevant_columns], summary_stats)

for (i in seq_along(relevant_columns)) {
print(paste("Summary statistics for", relevant_columns[i], ":"))
print(paste("Mean:", stats_results[[i]]$mean))
print(paste("Median:", stats_results[[i]]$median))
print(paste("Standard deviation:", stats_results[[i]]$sd))
print(paste("Mode:", stats_results[[i]]$mode))
}

# Multilinear Regression
input_b <- bsc_b[, c(relevant_columns, "Avg")]
model_b <- lm(Avg ~ ., data = input_b)

print(coef(model_b))

# Prediction and inference


moin_data <- data.frame(R.Programming = 78, Web.Development = 106, Optical.Fiber =
71, IoT.and.Robotics = 93)
predicted_avg_moin <- predict(model_b, newdata = moin_data)
actual_avg_moin <- 89 # assuming the actual average score
predictive_error_moin <- actual_avg_moin - predicted_avg_moin

print(predicted_avg_moin)
print(predictive_error_moin)

Step 1: check your system has BSc A and B section excel sheet

Step 2: Enter the whole program as given in lab manual


Step 3: check your R studio has xlsx package is available or not if
you to download that package click on tools and click on install
packages than type xlsx

Step 4: after typing xlsx then click on install

Step 5: check the package is installed in your system or not


Step 6 : copy the path of excel sheet for BSc 1 and 2

Step 7: copy and paste path in your program

Step 8: In the path we have forward slash \ change to back slash /


as you seen in the above photo

Step 9: Run the program


Output:
> library("xlsx")
>
> # Load datasets
> bsc_a <- read.xlsx("C:/Users/MOIN PASHA/OneDrive/Documents/R programming
/BSc 1.xlsx", sheetIndex = 1)
> bsc_b <- read.xlsx("C:/Users/MOIN PASHA/OneDrive/Documents/R programming
/BSc 2.xlsx", sheetIndex = 1)
>
> # Print column names and number of rows
> print("Columns in BSc A dataset:")
[1] "Columns in BSc A dataset:"
> print(names(bsc_a))
[1] "Name" "Age" "Gender"
[4] "University" "Graduation.Year" "Current.Job.Title"
[7] "Company" "Email" "Subject"
[10] "Subject.Marks"
>
> print("Columns in BSc B dataset:")
[1] "Columns in BSc B dataset:"
> print(names(bsc_b))
[1] "BSc.Course" "Reg.No" "Name"
[4] "R.Programming" "Web.Development" "Optical.Fiber"
[7] "IoT.and.Robotics" "Avg"
>
> print(paste("Number of rows in BSc A dataset:", nrow(bsc_a)))
[1] "Number of rows in BSc A dataset: 41"
> print(paste("Number of rows in BSc B dataset:", nrow(bsc_b)))
[1] "Number of rows in BSc B dataset: 30"
>
> # Analysis on BSc B dataset
> students_below_40_b <- sum(bsc_b$R.Programming < 40, na.rm = TRUE)
> students_greater_90_b <- sum(bsc_b$R.Programming > 90, na.rm = TRUE)
>
> print(paste("Number of students below 40 in R.Programming in BSc B datas
et:", students_below_40_b))
[1] "Number of students below 40 in R.Programming in BSc B dataset: 0"
> print(paste("Number of students above 90 in R.Programming in BSc B datas
et:", students_greater_90_b))
[1] "Number of students above 90 in R.Programming in BSc B dataset: 19"
>
> # Summary statistics
> summary_stats <- function(column) {
+ mean_val <- mean(column, na.rm = TRUE)
+ median_val <- median(column, na.rm = TRUE)
+ sd_val <- sd(column, na.rm = TRUE)
+ mode_val <- get_mode(column)
+
+ return(list(mean = mean_val, median = median_val, sd = sd_val, mode =
mode_val))
+ }
>
> get_mode <- function(v) {
+ uniqv <- unique(v)
+ uniqv[which.max(tabulate(match(v, uniqv)))]
+ }
>
> relevant_columns <- c("R.Programming", "Web.Development", "Optical.Fiber
", "IoT.and.Robotics")
> stats_results <- lapply(bsc_b[relevant_columns], summary_stats)
>
> for (i in seq_along(relevant_columns)) {
+ print(paste("Summary statistics for", relevant_columns[i], ":"))
+ print(paste("Mean:", stats_results[[i]]$mean))
+ print(paste("Median:", stats_results[[i]]$median))
+ print(paste("Standard deviation:", stats_results[[i]]$sd))
+ print(paste("Mode:", stats_results[[i]]$mode))
+ }
[1] "Summary statistics for R.Programming :"
[1] "Mean: 102.266666666667"
[1] "Median: 104"
[1] "Standard deviation: 24.3224507234654"
[1] "Mode: 81"
[1] "Summary statistics for Web.Development :"
[1] "Mean: 114.8"
[1] "Median: 115.5"
[1] "Standard deviation: 18.888785214951"
[1] "Mode: 115"
[1] "Summary statistics for Optical.Fiber :"
[1] "Mean: 72.5"
[1] "Median: 72.5"
[1] "Standard deviation: 13.3719985352821"
[1] "Mode: 70"
[1] "Summary statistics for IoT.and.Robotics :"
[1] "Mean: 74.2666666666667"
[1] "Median: 77"
[1] "Standard deviation: 11.5726590194998"
[1] "Mode: 84"
>
> # Multilinear Regression
> input_b <- bsc_b[, c(relevant_columns, "Avg")]
> model_b <- lm(Avg ~ ., data = input_b)
>
> print(coef(model_b))
(Intercept) R.Programming Web.Development Optical.Fiber
10.0844831 0.2224124 0.2270819 0.2000564
IoT.and.Robotics
0.1932278
>
> # Prediction and inference
> moin_data <- data.frame(R.Programming = 78, Web.Development = 106, Optic
al.Fiber = 71, IoT.and.Robotics = 93)
> predicted_avg_moin <- predict(model_b, newdata = moin_data)
> actual_avg_moin <- 89 # assuming the actual average score
> predictive_error_moin <- actual_avg_moin - predicted_avg_moin
>
> print(predicted_avg_moin)
1
83.67752
> print(predictive_error_moin)
1
5.322478
15. Demonstrate the Z-test # Sample data for two groups

# Sample data for two groups


group1_data <- c(22, 25, 28, 30, 32)
group2_data <- c(18, 20, 24, 26, 28)

# Parameters
alpha <- 0.05 # Significance level

# Two-sample Z-test function


two_sample_z_test <- function(data1, data2) {
mean1 <- mean(data1)
mean2 <- mean(data2)
sd1 <- sd(data1)
sd2 <- sd(data2)
n1 <- length(data1)
n2 <- length(data2)
z_score <- (mean1 - mean2) / sqrt((sd1^2 / n1) + (sd2^2 / n2))
p_value <- 2 * pnorm(-abs(z_score)) # Two-tailed test
result <- list(
z_score = z_score,
p_value = p_value
)
return(result)
}

# Perform the two-sample Z-test


result_two_sample <- two_sample_z_test(group1_data, group2_data)

# Display the result


cat("Z-Score:", result_two_sample$z_score, "\n")
cat("P-Value:", result_two_sample$p_value, "\n")
# Make a decision based on the p-value
if (result_two_sample$p_value < alpha) {
cat("Reject the null hypothesis. There is a significant difference between the two grou
ps.\n")
} else {
cat("Fail to reject the null hypothesis. There is no significant difference between the t
wo groups.\n")
}

OUTPUT:
> # Sample data for two groups
> group1_data <- c(22, 25, 28, 30, 32)
> group2_data <- c(18, 20, 24, 26, 28)
>
> # Parameters
> alpha <- 0.05 # Significance level
>
> # Two-sample Z-test function
> two_sample_z_test <- function(data1, data2) {
+ mean1 <- mean(data1)
+ mean2 <- mean(data2)
+ sd1 <- sd(data1)
+ sd2 <- sd(data2)
+ n1 <- length(data1)
+ n2 <- length(data2)
+ z_score <- (mean1 - mean2) / sqrt((sd1^2 / n1) + (sd2^2 / n2))
+ p_value <- 2 * pnorm(-abs(z_score)) # Two-tailed test
+ result <- list(
+ z_score = z_score,
+ p_value = p_value
+ )
+ return(result)
+ }
>
> # Perform the two-sample Z-test
> result_two_sample <- two_sample_z_test(group1_data, group2_data)
>
> # Display the result
> cat("Z-Score:", result_two_sample$z_score, "\n")
Z-Score: 1.634848
> cat("P-Value:", result_two_sample$p_value, "\n")
P-Value: 0.102081
>
> # Make a decision based on the p-value
> if (result_two_sample$p_value < alpha) {
+ cat("Reject the null hypothesis. There is a significant difference bet
ween the two groups.\n")
+ } else {
+ cat("Fail to reject the null hypothesis. There is no significant diffe
rence between the two groups.\n")
+ }
Fail to reject the null hypothesis. There is no significant difference bet
ween the two groups.

You might also like