0% found this document useful (0 votes)

14 views9 pages

R Code

The document provides a comprehensive guide on data visualization techniques using R, including importing data, exploring quantitative and qualitative data, and creating various plots such as histograms, scatterplots, and bar charts. It also covers descriptive statistics, data subsetting, and Poisson distribution analysis. Additionally, it includes exercises and examples for practical application of the concepts discussed.

Uploaded by

harrypoter

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

14 views9 pages

R Code

Uploaded by

harrypoter

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 9

########################################################################

# Data Visualization - 01
########################################################################

rm(list=ls())

# Importing data csv files

# https://www.kaggle.com/datasets/spscientist/students-performance-in-
exams?resource=download

# Insert the address of "Data.csv" file

help("read.csv")

#data <- read.csv("C:/Users/DIM/Downloads/Data.csv")

read.csv("F:/Msc(Kealniya)/1st Semi/Statistics_for_Data_Science _MDAN

51163/Lecturer 01/Data.csv")
data <- read.csv("F:/Msc(Kealniya)/1st Semi/Statistics_for_Data_Science
_MDAN 51163/Lecturer 01/Data.csv")
data = read.csv("F:/Msc(Kealniya)/1st Semi/Statistics_for_Data_Science
_MDAN 51163/Lecturer 01/Data.csv")

####### Data exploration

head(data)

# Data structure
str(data)

########################################################################
# Quantitative data
########################################################################

data$math_score

# Plot the histogram of math_score

help("hist")
hist(data$math_score)
hist(data$reading_score)

hist(data$math_score,
main="Histogram of Math Score",
xlab="Math Score", ylab="Frequency")

# Frequency polygon of the math_score

#install.packages("ggplot2")
library(ggplot2)

help(ggplot)

ggplot(data, aes(data$math_score)) +
geom_freqpoly(bins=10)

ggplot(data, aes(math_score)) + geom_freqpoly(bins=10)

# Density plot of math_score
density(data$math_score)

plot(density(data$math_score), main="Density of Math Score",

xlab="Math Score", ylab="Density")

# Scatterplot of math_score vs writing scores

plot(data$math_score, data$writing_score,
main="Scatterplot",
xlab="Math score", ylab="Writing score", pch= 19)

# Line plot
plot(data$math_score, data$writing_score, type = "o", main="Line plot",
xlab="Height", ylab="weight", pch=19)

help(grid)

########################################################################
# Qualitative data
########################################################################

# Extract group data

Race_Group <- data$race
# Frequency table of group data
Group <- table(Race_Group)

# Pie chart
help(pie)
pie(Group, main="Pie Chart")

# Basic barplot
help("ggplot")
ggplot(data=Group)

# Constructing a data frame

help("data.frame")

table(Race_Group)

group_label =c("A", "B", "C", "D", "E") # Group names

count =c(89, 190, 319, 262, 140) # count of each group

group_count = data.frame(group_label, count)

# Basic barplot
ggplot(data=group_count, aes(x=group_label, y=count)) +
geom_bar(stat="identity")

help("geom_bar")

# Pareto Chart

#install.packages("qcc")
library(qcc)

pareto.chart(count)

# Extracting gender and race data

x <- subset(data, select = c(gender, race))

table(x)

grp = rep(c("A", "B", "C", "D", "E"),2)

gen = c("F", "F", "F", "F", "F","M","M","M","M","M")
fre = c(36, 104, 180, 129, 69, 53, 86, 139, 133, 71)

df <- data.frame(grp, gen, fre)

# Multiple barplot

# barplot with multiple groups

ggplot(data=df, aes(x=grp, y=fre, fill=gen)) +
geom_bar(stat="identity")

# Use position=position_dodge()
ggplot(data=df, aes(x=grp, y=fre, fill=gen)) +
geom_bar(stat="identity", position=position_dodge())

# Sorting data based on the ascending order of math_score

data_sorted <- data[order(data$math_score),]
# Sorting data based on the descending order of math_score
data_sorted_ascending <- data[order(data$math_score, decreasing = TRUE),]

#########################################################################
######

# visualizing multivariate data:

# "women" dataset is available in R: https://stat.ethz.ch/R-manual/R-

devel/library/datasets/html/women.html
help(women)
data <- women

h <- data$height
w <- data$weight

# Scatterplot
plot(women, xlab = "Height (in)", ylab = "Weight (lb)",
main = "women data: American women aged 30-39")
grid(nx = 10, ny = 10)

# Line plot
plot(h, w, type = "o", main="Line plot",
xlab="Height", ylab="weight", pch=19)
help(grid)
#########################################################################
######

##Group Activity: Perform a descriptive analysis for your dataset

and interpret your results.

#Group/Room Dataset Data Description

#1 airquality https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/airquality.html
#2 attenu https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/attenu.html
#3 freeny https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/freeny.html
#4 iris https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/iris.html
#5 quakes https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/quakes.html
#6 rock https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/rock.html
#7 stackloss https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/stackloss.html
#8 swiss https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/swiss.html

# Check the datasets available in R: https://stat.ethz.ch/R-manual/R-

devel/library/datasets/html/00Index.html

#########################################################################
######

## Subsetting data

# Extract data between entry 30 and entry 55 for the first 3 variables
data_subset1 <- data[30:55, 1:3]
# Extract data between entry 30 and entry 55 for the first, second, and
fifth variables
data_subset1_1 <- data[30:55, c(1,2,5)]

# Extract data of the male students:

help("subset")
data_subset2 <- subset(data, gender == "male")

# Extract data of the male students who scored more than 75 for
mathematics:
data_subset3 <- subset(data, gender == "male" & math_score > 75)

# Extract data of the male students or students who scored more than 75
for mathematics:
data_subset3_1 <- subset(data, gender == "male" | math_score > 75)
# scenario 1: gender == "male"
# scenario 2: math_score > 75
# scenario 3: gender == "male" and math_score > 75
# Extract scores of the male students who scored more than 75 for
mathematics:
data_subset4 <- subset(data, gender == "male" & math_score > 75,
select = c(math_score, reading_score,
writing_score))

# Alternative method
data_subset3 <- subset(data, gender == "male" & math_score > 75)
#data_subset4_1 <- subset(data_subset3, select = c(math_score,
reading_score, writing_score))
data_subset4_2 <- data_subset3[, 2:4]

# To get a random sample from the data whole data set

set.seed(123) # To generate the same random sample
# Randomly select 40% of the data as a sample
sample_40 <- sample(c(TRUE, FALSE), nrow(DATA), replace=TRUE,
prob=c(0.4,0.6))
SAMPLE <- DATA[sample_40, ]

########################################################################
# Descriptive Statistics 02
########################################################################

# Create a univariate dataset

data <- c(4,10, 5, 8, 7.5, 8, 5, 16.5, 1, 7.8, 8, 10, 11, 18, 15,9, 14,
23, 21, 28)

# Mean
help(mean)
mean(data)
Mean_data <- mean(data)

# Median
help(median)
median(data)
med <- median(data)
MED = median(data)

# Mode
# We use frequency table
table(data)

# Range
max(data) # Maximum
min(data) # Minimum
# Calculating range
Range = max(data) - min(data)

# Standard deviation
help(sd)
sd(data)
# Variance
var(data)

# Coefficient of Variation
# Coefficient of Variation = std dev/mean*100

cv <- sd(data) / mean(data) * 100

# Inter Quartile Range

help("quantile")
Q1 <- quantile(data, c(.25))

quantile(data, c(.25, .75))

IQR(data)

# Five-number summary
summary(data)

# Boxplot
help("boxplot")
boxplot(data)

# Histogram
data <- c(4,10, 5, 8, 7.5, 8, 5, 16.5, 1, 7.8, 8, 10, 11, 18, 15,9, 14,
23, 21, 28)
hist(data)
help(hist)

# Density plot
density(data)
plot(density(data))

# Skewness & kurtosis

#install.packages("moments")
library(moments)

help(skewness)
skewness(data)
kurtosis(data)

## Data set with missing value

# Create a vector.
x <- c(12,7,3,4.2,18,2,54,-21,8,-5,NA)

# Find mean with missing data

help(mean)
mean_WithMissing <- mean(x)
mean_WithMissing

# Find mean dropping missing data (NA entries)

mean_WithOutMissing <- mean(x,na.rm = TRUE)
mean_WithOutMissing

#########################################################################
######
## Perform a descriptive analysis for the "iris" dataset and interpret
your results.

# iris: https://stat.ethz.ch/R-manual/R-
devel/library/datasets/html/iris.html

help(iris)

DATA <- iris

########################################################################
# Poisson Distribution 04

# Content: Density, distribution function, and random generation

# for the Poisson distribution with parameter lambda(=mu).

# Density function: dpois(x, lambda, log = FALSE)

# Distribution function (cumulative probability): ppois(q, lambda,
lower.tail = TRUE, log.p = FALSE)
# Random Generation: rpois(n, lambda)

########################################################################

rm(list=ls()) # Clear the Environment

# Example:

# RDA investigated that there are twelve cars crossing a bridge per
minute on average.

#(a) Find the probability of having

# (i) no cars
# (ii) three or more cars
# (iii) less than 17 cars
# crossing the bridge in a particular minute.

#(b) Plot the probability distribution of No of cars crossing the bridge.

# X = The number of cars crossing the bridge in a particular minute

# x = 0, 1, 2, 3,.....

# Poisson distribution with parameter lambda(=mu).

# X follows a Poisson(lamda = 12) distribution

help(dpois)

#(i) no cars: X=0

x = 0
# (i) P(X = 0)
P_X_0 <- dpois(x=0, lambda=12)
P_X_0

sprintf("P(X = 0) = %s", round(P_X_0, digits = 6))

sprintf("The probability of no cars crossing the bridge in a minute is
%s", round(P_X_0, digits = 6))
#(ii) three or more cars: X >= 3
# (ii) P(X >= 3) = 1 - P(X <= 2)
P_X_geq_3 <- 1 - ppois(2, lambda=12) # lower tail
P_X_geq_3

# P(X >= 3)
P_X_GEQ_3 <- ppois(3, lambda=12, lower=FALSE) # upper tail
P_X_GEQ_3
sprintf("P(X >= 3) = %s", round(P_X_GEQ_3, digits = 4))

#(iii) less than 17 cars

# (iii) P(X <= 17)
P_X_leq_17 <- ppois(17, lambda=12) # lower tail
P_X_leq_17
sprintf("P(X <= 17) = %s", round(P_X_leq_17, digits = 4))

# (b) The Poisson probability distribution plot

x <- 0:20

par(mfrow = c(2, 1))

# Probability density function (pdf)

barplot(dpois(x, lambda=12),col = "red",names.arg=x,
xlab = "X = No of cars crossing the bridge", ylab = "pdf: P(X =
x)",
main="Poisson (mu = 12) pdf")

# Cumulative density function (cdf)

barplot(ppois(x, lambda=12),col = "red",names.arg=x,
xlab = "X = No of cars crossing the bridge", ylab = "cdf:P(X <=
x)",
main="Poisson (mu = 12) cdf")

########################################################################
# Extra: Random generation for a Poisson distribution with parameter
lambda(=mu).

# rpois(n, lambda)

#Create a data set of 30 samples from a Poisson distribution with lambda

= 6.23
set.seed(2) # to get the sample
rpois(n=30, lambda = 6.23)

########################################################################
# Exercise:

# The number of accidents that occur at a busy intersection is Poisson

distributed
# with a mean of 3.5 per week. Find the probability of the following
events:
# (a) Less than three accidents in a week
# (b) Five or more accidents in a week
# (c) No accidents today

Elevator Controler Part 4 TK
100% (6)
Elevator Controler Part 4 TK
22 pages
Hustler Mini Z 44/52 Parts Manual
0% (1)
Hustler Mini Z 44/52 Parts Manual
125 pages
Computer Applications Arihant
100% (1)
Computer Applications Arihant
13 pages
02 The Intelligence Cycle
No ratings yet
02 The Intelligence Cycle
28 pages
Influence of Raw Materials Characteristics On Pyroprocessing
No ratings yet
Influence of Raw Materials Characteristics On Pyroprocessing
19 pages
BUSI 472 - Business Etiquette PowerPoint
100% (2)
BUSI 472 - Business Etiquette PowerPoint
13 pages
Company Profile PT Gita Guna Utama
No ratings yet
Company Profile PT Gita Guna Utama
53 pages
Etabs V18 Course Content
No ratings yet
Etabs V18 Course Content
9 pages
Introduction To Keyboarding: Using Good Technique
No ratings yet
Introduction To Keyboarding: Using Good Technique
17 pages
ASCOMETAL Steel Grades EN
No ratings yet
ASCOMETAL Steel Grades EN
8 pages
R For Data Exploration
No ratings yet
R For Data Exploration
52 pages
SML Practical 1to11
No ratings yet
SML Practical 1to11
23 pages
Pertemuan 2 Strategi Operasi Dalam Lingkungan Global
No ratings yet
Pertemuan 2 Strategi Operasi Dalam Lingkungan Global
48 pages
Lecture 10 R
No ratings yet
Lecture 10 R
117 pages
Stastistics and Probability With R Programming Language: Lab Report
50% (2)
Stastistics and Probability With R Programming Language: Lab Report
44 pages
Spatial Statistics in R
No ratings yet
Spatial Statistics in R
29 pages
R Record-1
No ratings yet
R Record-1
57 pages
R File Code
No ratings yet
R File Code
16 pages
TEB2043 Introduction To Data Science: Descriptive Analytics & Visualization DR Shuhaida Mohamed Shuhidan JAN 2025
No ratings yet
TEB2043 Introduction To Data Science: Descriptive Analytics & Visualization DR Shuhaida Mohamed Shuhidan JAN 2025
29 pages
List of Programs in R 2 Sem
No ratings yet
List of Programs in R 2 Sem
48 pages
Data Visualization
No ratings yet
Data Visualization
46 pages
Unit - 2: Data Manipulation With R & Data Visualization in Watson Studio
No ratings yet
Unit - 2: Data Manipulation With R & Data Visualization in Watson Studio
58 pages
Lab File AD PDF
No ratings yet
Lab File AD PDF
25 pages
R Note
No ratings yet
R Note
56 pages
R Program
No ratings yet
R Program
22 pages
Unit3 R
No ratings yet
Unit3 R
19 pages
R Lab Manual
No ratings yet
R Lab Manual
31 pages
R Complete
No ratings yet
R Complete
24 pages
STAT 1000 - Worksheet 2
No ratings yet
STAT 1000 - Worksheet 2
14 pages
Cost Practical
No ratings yet
Cost Practical
13 pages
STAT 1000 - Worksheet 2
No ratings yet
STAT 1000 - Worksheet 2
14 pages
R Code
No ratings yet
R Code
13 pages
RSTUDIO
No ratings yet
RSTUDIO
44 pages
R Commands
No ratings yet
R Commands
18 pages
Practical 1 EDA
No ratings yet
Practical 1 EDA
14 pages
R Console
No ratings yet
R Console
6 pages
Applied Statistics MAT1011
No ratings yet
Applied Statistics MAT1011
22 pages
IntroR 2
No ratings yet
IntroR 2
18 pages
X - 15 x-1 2. Print ('Hello Word!') ## (1) "Hello Word!" 3. X - 4 y - 5 Z - X+y Print (Z) 4. X - 4 y - 5 Cat ('The Sum of X and y Is', X+y)
No ratings yet
X - 15 x-1 2. Print ('Hello Word!') ## (1) "Hello Word!" 3. X - 4 y - 5 Z - X+y Print (Z) 4. X - 4 y - 5 Cat ('The Sum of X and y Is', X+y)
15 pages
Copy Entire Document Content in R Studio: R Script Compiled by Mr. Anup Sharma (Strictly To Be Used As Class Notes)
No ratings yet
Copy Entire Document Content in R Studio: R Script Compiled by Mr. Anup Sharma (Strictly To Be Used As Class Notes)
15 pages
R
No ratings yet
R
6 pages
Copy Entire Document Content in R Studio
No ratings yet
Copy Entire Document Content in R Studio
17 pages
Analysis Using Statistical: Introduction & Data Exploration
No ratings yet
Analysis Using Statistical: Introduction & Data Exploration
23 pages
Donnees Mathematique
No ratings yet
Donnees Mathematique
9 pages
R Session - Note2 - Updated
No ratings yet
R Session - Note2 - Updated
7 pages
STAT 1000 - Worksheet 2
No ratings yet
STAT 1000 - Worksheet 2
14 pages
CourseKata R Cheatsheet ABC
No ratings yet
CourseKata R Cheatsheet ABC
5 pages
UNIT 3 - Exploratory Graphs
No ratings yet
UNIT 3 - Exploratory Graphs
23 pages
2 R - Zajecia - 4 - Eng
No ratings yet
2 R - Zajecia - 4 - Eng
7 pages
Practice 7 QC Tool Part 1
No ratings yet
Practice 7 QC Tool Part 1
11 pages
R Training AM
No ratings yet
R Training AM
6 pages
Ayush Sonar 310104230868 Practical 3 DS R
No ratings yet
Ayush Sonar 310104230868 Practical 3 DS R
10 pages
R Cheatsheet ABC
No ratings yet
R Cheatsheet ABC
3 pages
Commands For Data Analysis Using R
No ratings yet
Commands For Data Analysis Using R
11 pages
R Course
No ratings yet
R Course
7 pages
Intro To R Software
No ratings yet
Intro To R Software
7 pages
R Pgms 30
No ratings yet
R Pgms 30
6 pages
BAN5
No ratings yet
BAN5
2 pages
Ds
No ratings yet
Ds
2 pages
R Cheatsheet ABC
No ratings yet
R Cheatsheet ABC
3 pages
R Cheatsheet ABCD
No ratings yet
R Cheatsheet ABCD
3 pages
R Functions
No ratings yet
R Functions
8 pages
Stainless Steel Razni Standardi
No ratings yet
Stainless Steel Razni Standardi
6 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
11 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
8 pages
UL2
No ratings yet
UL2
2 pages
Lab Exercise 1
No ratings yet
Lab Exercise 1
16 pages
Basics: TH TH TH TH TH TH TH
No ratings yet
Basics: TH TH TH TH TH TH TH
3 pages
Abap Important All Data Infor.
No ratings yet
Abap Important All Data Infor.
17 pages
All Values in The First Column
No ratings yet
All Values in The First Column
7 pages
(Practical) Programming With R
No ratings yet
(Practical) Programming With R
5 pages
Erpnext Documentation
No ratings yet
Erpnext Documentation
8 pages
CTPM PIDs Review Clarifications For WQIFOD 26-30 Aug 2012
No ratings yet
CTPM PIDs Review Clarifications For WQIFOD 26-30 Aug 2012
6 pages
Upload Chi Phi Dien Quy 1-2021
No ratings yet
Upload Chi Phi Dien Quy 1-2021
138 pages
4090-9001 Supervised IAM Installation Manual Rev E PDF
No ratings yet
4090-9001 Supervised IAM Installation Manual Rev E PDF
2 pages
DQS251 - Piling - Spun Piles - Tutorial-Drwgs-Dec 2019 - pg11
No ratings yet
DQS251 - Piling - Spun Piles - Tutorial-Drwgs-Dec 2019 - pg11
8 pages
Complete Product Catalog With Line
No ratings yet
Complete Product Catalog With Line
29 pages
2 Unit 2 Python Library For Data Wrangling
No ratings yet
2 Unit 2 Python Library For Data Wrangling
37 pages
MAURER Guided Cross-Tie: Railway Expansion Joint
100% (1)
MAURER Guided Cross-Tie: Railway Expansion Joint
4 pages
Breathe Brick: International Research Journal of Engineering and Technology (IRJET)
No ratings yet
Breathe Brick: International Research Journal of Engineering and Technology (IRJET)
4 pages
Itc Reviewer 1
No ratings yet
Itc Reviewer 1
5 pages
Matlab Assignment
No ratings yet
Matlab Assignment
4 pages
Delegated Content Erasure in IPFS: Future Generation Computer Systems June 2020
No ratings yet
Delegated Content Erasure in IPFS: Future Generation Computer Systems June 2020
10 pages
Restricting Data and Sorting Data PDF
No ratings yet
Restricting Data and Sorting Data PDF
24 pages
Module 1-Discrete Structure
No ratings yet
Module 1-Discrete Structure
7 pages
Computer SSC-I Rubrics HA (19!05!2023)
No ratings yet
Computer SSC-I Rubrics HA (19!05!2023)
4 pages
How To Use SignalWire As An SMS Provider For GoHighLevel - Simple Steps
No ratings yet
How To Use SignalWire As An SMS Provider For GoHighLevel - Simple Steps
3 pages
Python For Beginners
From Everand
Python For Beginners
Célio Azevedo
No ratings yet
How to a Developers Guide to 4k: Developer edition, #3
From Everand
How to a Developers Guide to 4k: Developer edition, #3
Xinc Cyberwizard
No ratings yet
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet

R Code

Uploaded by

R Code

Uploaded by

########################################################################

# Importing data csv files

# Insert the address of "Data.csv" file

#data <- read.csv("C:/Users/DIM/Downloads/Data.csv")

read.csv("F:/Msc(Kealniya)/1st Semi/Statistics_for_Data_Science _MDAN

####### Data exploration

# Plot the histogram of math_score

# Frequency polygon of the math_score

ggplot(data, aes(math_score)) + geom_freqpoly(bins=10)

plot(density(data$math_score), main="Density of Math Score",

# Scatterplot of math_score vs writing scores

# Extract group data

# Constructing a data frame

group_label =c("A", "B", "C", "D", "E") # Group names

group_count = data.frame(group_label, count)

# Extracting gender and race data

grp = rep(c("A", "B", "C", "D", "E"),2)

df <- data.frame(grp, gen, fre)

# barplot with multiple groups

# Sorting data based on the ascending order of math_score

# visualizing multivariate data:

# "women" dataset is available in R: https://stat.ethz.ch/R-manual/R-

##Group Activity: Perform a descriptive analysis for your dataset

#Group/Room Dataset Data Description

# Check the datasets available in R: https://stat.ethz.ch/R-manual/R-

# Extract data of the male students:

# To get a random sample from the data whole data set

# Create a univariate dataset

cv <- sd(data) / mean(data) * 100

# Inter Quartile Range

quantile(data, c(.25, .75))

# Skewness & kurtosis

## Data set with missing value

# Find mean with missing data

# Find mean dropping missing data (NA entries)

DATA <- iris

# Content: Density, distribution function, and random generation

# Density function: dpois(x, lambda, log = FALSE)

rm(list=ls()) # Clear the Environment

#(a) Find the probability of having

#(b) Plot the probability distribution of No of cars crossing the bridge.

# X = The number of cars crossing the bridge in a particular minute

# Poisson distribution with parameter lambda(=mu).

#(i) no cars: X=0

sprintf("P(X = 0) = %s", round(P_X_0, digits = 6))

#(iii) less than 17 cars

# (b) The Poisson probability distribution plot

par(mfrow = c(2, 1))

# Probability density function (pdf)

# Cumulative density function (cdf)

#Create a data set of 30 samples from a Poisson distribution with lambda

# The number of accidents that occur at a busy intersection is Poisson

You might also like