CourseKata R Cheatsheet ABC

This cheat sheet provides a comprehensive overview of statistical methods and data science techniques, including word equations, summary tables, simple statistics, probability distributions, simulations, model fitting, and visualizations. It includes R code snippets for various operations such as computing means, creating frequency tables, performing t-tests, and generating plots. The document serves as a quick reference for statistical analysis and data visualization in R.

Uploaded by

issy

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

18 views5 pages

CourseKata R Cheatsheet ABC

Uploaded by

issy

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 5

Statistics and Data Science I (ABC) CHEAT SHEET

https://bit.ly/r-cheatsheet-abc
Word Equations Summary Tables Simple Statistics
outcome = explanatory + other stuff # compute five-number summary mean(data_set$Y)
Y = X + other stuff favstats(~ Y, data = data_set) var(data_set$Y)
sd(data_set$Y)
# create frequency table
Basics tally(data_set$Y) cohensD(Y ~ X, data = data_set)
tally(~ Y, data = data_set) cor(Y ~ X, data = data_set)
print("Hello world!")
# tally by condition b1(Y ~ X, data = data_set)
# assign value to object b1(one_model)
tally(~ Y < 1900, data =
myNumber <- 5
data_set)
pre(Y ~ X, data = data_set)
# combine values into vector f(Y ~ X, data = data_set)
# two-way frequency table
myVector <- c(1, 2, 3)
tally(Y ~ X, data = data_set, margin =
TRUE, format = “proportion”)
# first element in vector
myVector[1]
Data Frame # arrange rows by variable
arrange(data_set, Y)
# orders values or cases # structure of data frame # arrange rows by variable in descending
sort(myVector) str(data_set) arrange(data_set, desc(Y))
# arithmetic operations
# view first/last six rows # creates data frame from csv file
sum(1, 2, 100), +, -, *, /
head(data_set) data_set <- read.csv("file_name", header =
sqrt(157)
abs(data_set$Y) tail(data_set) TRUE)

# logical operations # select multiple variables # convert quantitative variable

>, <, >=, <=, ==, !=, |, & select(data_set, Y1, Y2) # to categorical
factor(data_set$Y)
# results in a variable with values # first six rows of selected variables factor(data_set$Y, levels = c(1,2), labels
# of TRUE or FALSE head(select(data_set, Y1, Y2)) = c("A", "B"))
data_set$C <- data_set$A > data_set$B
# transform values
# select variable (a column) recode(data_set$Y, "0" = 0, "1" = 50, "2" =
data_set$Y 100)

# find rows that meet condition # creates two equal sized groups
data_set[data_set$Y > 40] ntile(data_set$Y, 2)
filter(data_set, Y > 300)
filter(data_set, Y != "NA") # convert categorical variable
# to quantitative
as.numeric(data_set$Y)
Probability Distribution
# CI using t distribution
# calculate the probability area confint(empty_model)
xpnorm(65.1, data_set$mean, data_set$sd)

# calculate a z-score # calculate p-value using F-distribution

zscore(data_set$Y) xpf(sample_F, df1 = 2 , df2 = 10)

# returns t at this probability

qt(.975, df = 999)
# returns F at this probability
qf(.95, df1 = 1, df2 = 100)

Page: 2 ▷ Updated: 2023-04 ▷ Learn more about CourseKata @ https://coursekata.org

Simulation Fitting and Evaluating Models
# randomize sampling distribution
# sample without replacement # of b1s, centered on 0 # empty model
sample(data_set, 6) sdob1 <- do(1000) * empty_model <- lm(Y ~ NULL,
b1(shuffle(Y) ~ X, data = data_set) data = data_set)
# sample with replacement
resample(data_set, 10) # bootstrap sampling distribution of # use one expanatory variable
b1s, one_model <- lm(Y ~ X, data = data_set)
do(3) * resample (data_set, 10) # centered on sample b1
sdob1_boot <- do(1000) * # create a function from a formula
# mixes up values in a variable b1(Y ~ X, data = resample(data_set)) one_model_fun <- makeFun(one_model)
shuffle(data_set$Y)
one_model_fun(x_level_1)
# count the number of b1s at the upper
# simulate sampling 10000 Ys # and lower extreme
# model predictions and residuals
# from normal distribution tally(sdob1$b1 > sample_b1 | data_set$empty_predict <-
sim_Y <- rnorm(10000, Y_stats$mean, sdob1$b1 < -sample_b1) predict(empty_model)
Y_stats$sd) data_set$empty_resid <-
resid(empty_model)
# return TRUE for middle 95% of
# put simulated Ys into dataframe
data_set<- data.frame(sim_Y) distribution
# produce ANOVA table
middle(sdob1$b1, .95)
anova(empty_model)
# simulate sampling distribution of supernova(one_model)
means # randomize sampling distribution of
sim_SDoM <- do(10000) * mean(rnorm(157, PREs # t-test, using pooled variance
Y_stats$mean, Y_stats$sd)) sdoPRE <- do(1000) * PRE(shuffle(Y) ~ X,
data = data_set) t.test(Tip ~ Condition, data =
data_set, var.equal=TRUE)
# bootstrap sampling distribution of
# randomize sampling distribution of
means # pairwise comparison
bootSDoM <- do(10000) * Fs
sdoF <- do(1000) * # corrections: "Bonferroni",
mean(resample(data_set$Y, 157)) “Tukey” (default) or "none"
fVal(shuffle(Y) ~ X, data = data_set)
pairwise(one_model, correction =
# counts extreme Fs "none")
tally(~fVal > sample_F, data = sdoF)

Page: 3 ▷ Updated: 2023-04 ▷ Learn more about CourseKata @ https://coursekata.org

Visualizations I
# faceted grid of histograms # density histogram
# histogram gf_histogram(~ Y, data = data_set) %>% gf_dhistogram(~ Y, data = data_set, fill
gf_histogram(~ Y, data = data_set) %>% gf_facet_grid(X ~ .) = "orange") %>%
# change labels gf_density()
gf_labs(title = "Graph Title", x
= "Y_Name", y = "Frequency")

# bar graph # boxplots # point plot

gf_bar( ~ Y, data = data_set) gf_boxplot(Y ~ X, data = data_set) gf_point(Y ~ X, data = data_set)

Page: 4 ▷ Updated: 2023-04 ▷ Learn more about CourseKata @ https://coursekata.org

Visualizations II
# boxplot overlaid with jitter plot # sampling distribution of b1
# jitter plot gf_boxplot(Y ~ X, data = data_set, fill gf_histogram(~b1, data = sdob1, fill =
gf_jitter(Y ~ X, data = data_set) = "orange") %>% ~middle(b1, .95)) %>%
gf_jitter(height = 0, alpha = .2, size # modify the limits on x- and y-axes
= 3) gf_lims(x = c(-12, 12), y = c(0, 70))

# Add model # CI for pairwise comparisons

gf_point(Y ~ X, data = data_set) %>% pairwise(one_model, plot = TRUE)
# add model predictions as red points
gf_point(Y ~ X , shape = 1, size = 3,
color = "firebrick") %>%
# add best fitting model as a red line
gf_model(one_model, color = “red”)

Page: 5 ▷ Updated: 2023-04 ▷ Learn more about CourseKata @ https://coursekata.org

Sampe Generate and Plot
No ratings yet
Sampe Generate and Plot
14 pages
Module 3 R Data Science
No ratings yet
Module 3 R Data Science
158 pages
Lab Manual - DSR
No ratings yet
Lab Manual - DSR
32 pages
R语言学习笔记
No ratings yet
R语言学习笔记
78 pages
Commands For Data Analysis Using R
No ratings yet
Commands For Data Analysis Using R
11 pages
Week13 Slides Review
No ratings yet
Week13 Slides Review
23 pages
Ie2151 PS 3
No ratings yet
Ie2151 PS 3
15 pages
R Codes
No ratings yet
R Codes
5 pages
Probs Line of Code
No ratings yet
Probs Line of Code
2 pages
R Cheatsheet ABCD
No ratings yet
R Cheatsheet ABCD
4 pages
R Program
No ratings yet
R Program
22 pages
R Program Corrections
No ratings yet
R Program Corrections
20 pages
IBS Sample I
No ratings yet
IBS Sample I
10 pages
STAT 1000 - Worksheet 2
No ratings yet
STAT 1000 - Worksheet 2
14 pages
STAT 1000 - Worksheet 2
No ratings yet
STAT 1000 - Worksheet 2
14 pages
R Code
No ratings yet
R Code
13 pages
R Complete
No ratings yet
R Complete
24 pages
R Training AM
No ratings yet
R Training AM
6 pages
Lab File AD PDF
No ratings yet
Lab File AD PDF
25 pages
R Cheatsheet ABCD
No ratings yet
R Cheatsheet ABCD
3 pages
R Cheatsheet ABCD
No ratings yet
R Cheatsheet ABCD
3 pages
R Cheatsheet ABC
No ratings yet
R Cheatsheet ABC
3 pages
R Code
No ratings yet
R Code
9 pages
Big Data Slip Solution
No ratings yet
Big Data Slip Solution
18 pages
COST - JournalPracticals (1-7)
No ratings yet
COST - JournalPracticals (1-7)
22 pages
R Programming Cheat Sheet
No ratings yet
R Programming Cheat Sheet
7 pages
R Tutorial
No ratings yet
R Tutorial
32 pages
Genetica Cuantitativa
No ratings yet
Genetica Cuantitativa
120 pages
R Cheatsheet ABC
No ratings yet
R Cheatsheet ABC
3 pages
Merge
No ratings yet
Merge
28 pages
Ali
No ratings yet
Ali
31 pages
Computer Interactive Statistics
No ratings yet
Computer Interactive Statistics
102 pages
10B MemoryManagement
No ratings yet
10B MemoryManagement
242 pages
R File Code
No ratings yet
R File Code
16 pages
BCA V SEM Advanced R Programming Lab Manual Final-1
No ratings yet
BCA V SEM Advanced R Programming Lab Manual Final-1
5 pages
R Console
No ratings yet
R Console
6 pages
Analysis Using Statistical: Introduction & Data Exploration
No ratings yet
Analysis Using Statistical: Introduction & Data Exploration
23 pages
Cost Practical
No ratings yet
Cost Practical
13 pages
Psych 115 Week 5 Discussion Slides - CK
No ratings yet
Psych 115 Week 5 Discussion Slides - CK
59 pages
STTN 225 R Summary
No ratings yet
STTN 225 R Summary
18 pages
Final Cost Practical
No ratings yet
Final Cost Practical
29 pages
OHC Motility-Ashmore Review08
No ratings yet
OHC Motility-Ashmore Review08
39 pages
Chapter 5
No ratings yet
Chapter 5
22 pages
UL2
No ratings yet
UL2
2 pages
Linear Model and Extensions Peng Ding Instant Download
No ratings yet
Linear Model and Extensions Peng Ding Instant Download
91 pages
R Intro 2011
No ratings yet
R Intro 2011
115 pages
Advanced R Data Analysis Training PDF
No ratings yet
Advanced R Data Analysis Training PDF
72 pages
Cochlear Amplification
No ratings yet
Cochlear Amplification
11 pages
Store 24 AB
0% (2)
Store 24 AB
15 pages
2 R - Zajecia - 4 - Eng
No ratings yet
2 R - Zajecia - 4 - Eng
7 pages
R Course
No ratings yet
R Course
7 pages
R Commands
No ratings yet
R Commands
5 pages
Get Introduction To Probability and Statistics For Engineers and Scientists, 6th Edition Sheldon M. Ross PDF Ebook With Full Chapters Now
No ratings yet
Get Introduction To Probability and Statistics For Engineers and Scientists, 6th Edition Sheldon M. Ross PDF Ebook With Full Chapters Now
40 pages
Statistics Cheat Sheet
100% (1)
Statistics Cheat Sheet
4 pages
A Short List of Some Useful R Commands: Input and Display
No ratings yet
A Short List of Some Useful R Commands: Input and Display
2 pages
Rstudio Study Notes For PA 20181126
No ratings yet
Rstudio Study Notes For PA 20181126
6 pages
Assignment-2 ML Solution by Loknath Regmi
No ratings yet
Assignment-2 ML Solution by Loknath Regmi
6 pages
BAN5
No ratings yet
BAN5
2 pages
R Lab File Deepak
No ratings yet
R Lab File Deepak
27 pages
R Commands: Appendix B
No ratings yet
R Commands: Appendix B
5 pages
Lesson 3 - Statistics Refresher
No ratings yet
Lesson 3 - Statistics Refresher
56 pages
Fundamentals of Data Science NEP
No ratings yet
Fundamentals of Data Science NEP
2 pages
Luto Ni Bespren
No ratings yet
Luto Ni Bespren
3 pages
Partial Least Squares (PLS) Structural Equation Modeling (SEM) For Building and Testing Behavioral Causal Theory: When To Choose It and How To Use It
No ratings yet
Partial Least Squares (PLS) Structural Equation Modeling (SEM) For Building and Testing Behavioral Causal Theory: When To Choose It and How To Use It
24 pages
Sim R
No ratings yet
Sim R
6 pages
ANALYZING The Best Laptop
No ratings yet
ANALYZING The Best Laptop
12 pages
The Students' Engagement in Schools Questionnaire
No ratings yet
The Students' Engagement in Schools Questionnaire
13 pages
Stat 230 Introduction To Probability and Statistics: Sections 1.1 & 1.2
No ratings yet
Stat 230 Introduction To Probability and Statistics: Sections 1.1 & 1.2
17 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
11 pages
Advanced C Concepts and Programming: First Edition
From Everand
Advanced C Concepts and Programming: First Edition
Gayatri
3/5 (1)
IAC Lecture4 Homework
No ratings yet
IAC Lecture4 Homework
12 pages
STAT-2450 Assignment 1: Name:, Student ID: B00
No ratings yet
STAT-2450 Assignment 1: Name:, Student ID: B00
9 pages
Workshop Activity: X Seq y Length
No ratings yet
Workshop Activity: X Seq y Length
3 pages
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
8 pages
2018 Sisk GrowthMindsetMetaAnalysis
No ratings yet
2018 Sisk GrowthMindsetMetaAnalysis
23 pages
The Essential R Reference
From Everand
The Essential R Reference
Mark Gardener
No ratings yet
Statistics and Probability Yong Hwa M. Jeong Grade 11 STEM-B Quarter 4 - Module 1: Test of Hypothesis
No ratings yet
Statistics and Probability Yong Hwa M. Jeong Grade 11 STEM-B Quarter 4 - Module 1: Test of Hypothesis
22 pages
RBasics Handout
No ratings yet
RBasics Handout
6 pages
For The Learners: Math 11 (Statistics and Probability)
No ratings yet
For The Learners: Math 11 (Statistics and Probability)
12 pages
Factors Affecting On Students Test Scores
No ratings yet
Factors Affecting On Students Test Scores
43 pages
Forecasting of Motorcycle Demand Using Calender Variations, Hybrid Calender Variations-ANN and Disagregation (Case Study in Jombang)
No ratings yet
Forecasting of Motorcycle Demand Using Calender Variations, Hybrid Calender Variations-ANN and Disagregation (Case Study in Jombang)
8 pages
Statistics
No ratings yet
Statistics
6 pages
Bumastics
No ratings yet
Bumastics
2 pages
IEStat2 - Laboratory 4 - de Jesus
No ratings yet
IEStat2 - Laboratory 4 - de Jesus
12 pages
Basics 1 Vario Gram
No ratings yet
Basics 1 Vario Gram
37 pages
Test1 Answers
No ratings yet
Test1 Answers
7 pages
KQ2 Depth Study PDF
No ratings yet
KQ2 Depth Study PDF
5 pages
Assignment 1 CLB20903 January 2020 PDF
No ratings yet
Assignment 1 CLB20903 January 2020 PDF
4 pages
Frekuensi Usia New
No ratings yet
Frekuensi Usia New
2 pages
Design
No ratings yet
Design
6 pages
Method Validation Calculation File of Assay
No ratings yet
Method Validation Calculation File of Assay
6 pages
Question Bank - Machine Learning
100% (1)
Question Bank - Machine Learning
4 pages
CT3 QP 0512 PDF
No ratings yet
CT3 QP 0512 PDF
6 pages
Statquest Multinomial Naive Bayes Study Guide V3-Mgywmv
No ratings yet
Statquest Multinomial Naive Bayes Study Guide V3-Mgywmv
8 pages
Statistical Calculations Using Calculators
No ratings yet
Statistical Calculations Using Calculators
4 pages
Key Quotes From The Play PDF
100% (2)
Key Quotes From The Play PDF
3 pages

CourseKata R Cheatsheet ABC

Uploaded by

CourseKata R Cheatsheet ABC

Uploaded by

Statistics and Data Science I (ABC) CHEAT SHEET

# logical operations # select multiple variables # convert quantitative variable

# calculate a z-score # calculate p-value using F-distribution

# returns t at this probability

Page: 2 ▷ Updated: 2023-04 ▷ Learn more about CourseKata @ https://coursekata.org

Page: 3 ▷ Updated: 2023-04 ▷ Learn more about CourseKata @ https://coursekata.org

# bar graph # boxplots # point plot

Page: 4 ▷ Updated: 2023-04 ▷ Learn more about CourseKata @ https://coursekata.org

# Add model # CI for pairwise comparisons

Page: 5 ▷ Updated: 2023-04 ▷ Learn more about CourseKata @ https://coursekata.org

You might also like