Programming with R
Open RStudio -> File -> New File -> R Script
1. Assign 10 values to a variable. Find its length, sum and mean
# Assign 10 values to a variable. Find its length, sum and mean.
a=c(1,2,3,4,5,6,7,8,9,0)
length(a)
sum(a)
mean(a)
2. Using sequence function to generate the numbers, increment by 10.
# Using sequence function to generate the numbers, increment by 10.
seq(from=100,to=2000,by=10)
3. Install a package insurance Data
# Installing the package "insuranceData
install.packages("insuranceData")
4. Load this package
# Loading the package "insuranceData"
library(insuranceData)
5. From this package, load the data SingaporeAuto
a. Hint: You may have to use a function data( ) or get(data( ))
# Load the data "SingaporeAuto"
data("SingaporeAuto")
6. Find out the nature of the variables, check for NA’s, size and shape
# Find out the nature of the variables, check for NA’s, size and shape
data=SingaporeAuto
str(data)
is.na(data)
dim(data)
nrow(data)
ncol(data)
7. Extract first 10 and 40 to 70 cases with first 5 and 11, 13, 15th variables
# Extract first 10 and 40 to 70 cases with first 5 and 11, 13, 15th variables
data[c(1:10,40:70),c(1:5,11,13,15)]
8. Create new variable based on the
a. variable Exp_weights
i. If Exp_weights is negative, then new variable is Low else High
# Ensure dplyr is loaded
library(dplyr)
# Add the new column EXP using case_when
data <- data %>%
mutate(EXP = case_when(
Exp_weights < 0 ~ "LOW",
TRUE ~ "HIGH"
))
# Group by EXP and summarise the count
data %>%
group_by(EXP) %>%
summarise(exp = n())
b. variable NCD
NCD New Variable
0 NIL
10 and 20 Level1
30 Level2
Else Level3
# Ensure dplyr is loaded
library(dplyr)
# Add a new column "N1" using case_when
data <- data %>%
mutate(N1 = case_when(
NCD == 0 ~ "NIL",
NCD == 10 | NCD == 20 ~ "LEVEL1", # Use OR (|) for multiple conditions
NCD == 30 ~ "LEVEL2",
TRUE ~ "LEVEL3"
))
# Group by "N1" and summarise the counts
data %>%
group_by(N1) %>%
summarise(N1_count = n()) # Renamed the summarised column for clarity
9. Create a subset that has only Clm_Count = 0 and has only numeric variables
# Sample data frame (for demonstration)
df <- data.frame(
Clm_Count = c(0, 1, 0, 3, 0, 0),
Age = c(25, 34, 29, 40, 22, 30),
Salary = c(50000, 60000, 55000, 70000, 48000, 52000),
Name = c("Alice", "Bob", "Charlie", "David", "Eve", "Frank")
)
# Step 1: Filter rows where Clm_Count == 0
df_filtered <- df[df$Clm_Count == 0, ]
# Step 2: Select only numeric columns
df_numeric <- df_filtered[sapply(df_filtered, is.numeric)]
# Step 3: Print the resulting subset
print(df_numeric)
10. Find the counts of
a. SexInsured
b. VehicleType
c. SexInsured and VehicleType
d. VehicleType and SexInsured
e. Exp_weights < 0.75 and VehicleType
# Loading the package "insuranceData"
library(insuranceData)
# Load the data "SingaporeAuto"
data("SingaporeAuto")
# Grouping by SexInsured
data %>%
group_by(SexInsured) %>%
summarise(SI = n())
# Grouping by VehicleType
data %>%
group_by(VehicleType) %>%
summarise(VT = n())
# Grouping by SexInsured and VehicleType
data %>%
group_by(SexInsured, VehicleType) %>%
summarise(SVT = n())
# Grouping by VehicleType and SexInsured (reversed order):
data %>%
group_by(VehicleType, SexInsured) %>%
summarise(SVT = n())
# Logical Condition and VehicleType:
data %>%
mutate(ExpLessThan075 = Exp_weights < 0.75) %>% # Create a new logical column
group_by(ExpLessThan075, VehicleType) %>%
summarise(EXPVT = n())
11. Draw a scatter plot – EXP_WEIGHTS
install.packages("ggplot2")
library(ggplot2) # Load ggplot2 for visualisation
# Loading the package "insuranceData"
library(insuranceData)
# Load the data "SingaporeAuto"
data("SingaporeAuto")
# Create an index for rows
data <- data %>%
mutate(INDEX = 1:nrow(data))
# Scatter plot with index
ggplot(data, aes(x = INDEX, y = Exp_weights)) +
geom_point() +
labs(title = "Scatter Plot of Exp_weights",
x = "Index",
y = "Exp_weights") +
theme_minimal()