10-Visualization of Streaming Data and Class R Code-10!03!2023
10-Visualization of Streaming Data and Class R Code-10!03!2023
x <- -10:10
y <- -10:10
z <- sqrt(outer(x ^ 2, y ^ 2, "+"))
contour(x, y, z)
#=====
x <- -10:10
y <- -10:10
z <- sqrt(outer(x ^ 2, y ^ 2, "+"))
contour(x, y, z,
nlevels = 20)
############################################
# Libraries
library(tidyverse)
#install.packages('hrbrthemes')
library(hrbrthemes)
library(viridis)
#install.packages('patchwork')
library(patchwork)
# Dataset:
a <- data.frame( x=rnorm(20000, 10, 1.2),
y=rnorm(20000, 10, 1.2),
group=rep("A",20000))
b <- data.frame( x=rnorm(20000, 14.5, 1.2), y=rnorm(20000, 14.5, 1.2),
group=rep("B",20000))
c <- data.frame( x=rnorm(20000, 9.5, 1.5), y=rnorm(20000, 15.5, 1.5),
group=rep("C",20000))
data <- do.call(rbind, list(a,b,c))
View(data)
p1 <- data %>%
ggplot( aes(x=x, y=y)) +
geom_point(color="#69b3a2", size=2) +
theme_ipsum() +
theme( legend.position="non
e"
)
p2 <- ggplot(data, aes(x=x, y=y) ) +
stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
scale_fill_viridis() +
theme( legend.position
='none'
)
p1 + p2
library(plotly)
library(MASS)
# Compute kde2d
kd <- with(data, MASS::kde2d(x, y, n = 50))
#Node-Link Diagram
#https://krisrs1128.github.io/stat479/
library("dplyr")
library("knitr")
library("ggplot2")
#install.packages('ggraph')
library("ggraph")
library("gridExtra")
#install.packages("networkD3")
library("networkD3")
library("tidygraph")
theme_set(theme_graph())
#A node-link diagram
G_school <- as_tbl_graph(highschool) %>%
activate(edges) %>%
mutate(year = factor(year))
ggraph(G_school) +
geom_edge_link(aes(col = year), width = 0.1) +
geom_node_point()
#For Tree
G_flare <- tbl_graph(flare$vertices, flare$edges)
p1 <- ggraph(G_flare, 'tree') +
geom_edge_link() +
geom_node_label(aes(label = shortName), size = 3)
############################################
#3D Network
school_edges <- G_school %>%
activate(edges) %>%
as.data.frame()
simpleNetwork(school_edges)
############################################
#Shiny
#ui.R
library(shiny)
library(shinydashboard)
shinyServer( pageWithSidebar
( headerPanel("My First
App"), sidebarPanel(
selectInput("Distribution",
"Please Select Distribution Type",
choices = c("Normal", "Exponential")),
sliderInput("sampleSize",
"Please Select Sample Size",
min = 100, max=5000, value=1000,step=100),
conditionalPanel(condition ="input.Distribution=='Normal'",
textInput("mean","Please Select the mean:",10),
textInput("sd", "Please Select Std. Devivation",3)),
conditionalPanel(condition="input.Distribution=='Exponential'",
textInput("lambda","Pls. Select Exponential Lambda",1))
),
mainPanel( plotOutput("myPl
ot")
)
)
)
#server.R
shinyServer(
function(input, output, session){
output$myPlot <- renderPlot({
distType <- input$Distribution
size <- input$sampleSize
if(distType == "Normal"){
randomVec <- rnorm(size, mean = as.numeric(input$mean),
sd = as.numeric(input$sd))
}
else{
randomVec <- rexp(size, rate = 1/as.numeric(input$lambda))
}
hist(randomVec, col="blue")
})
}
)
############################################
#igraph
#install.packages('igraph')
library(igraph)
nnodes <- 100
nnedges <- 200
# in circle ?
visNetwork(nodes, edges, height = "500px") %>%
visIgraphLayout(layout = "layout_in_circle") %>%
visNodes(size = 10) %>%
visOptions(highlightNearest = list(enabled = T, hover = T),
nodesIdSelection = T)
#================
sidebarLayout( si
debarPanel(
mainPanel(
plotOutput(outputId = "hist_plot"))
)
)
############################################
#Heat Map
# Dummy data
data <- matrix(runif(100, 0, 5) , 10 , 10)
colnames(data) <- letters[c(1:10)]
rownames(data) <- paste( rep("row",10) , c(1:10) , sep=" ")
#Heat Map
# Library
library(ggplot2)
# Dummy data
x <- LETTERS[1:20]
y <- paste0("var", seq(1,20))
data <- expand.grid(X=x, Y=y)
data$Z <- runif(400, 0, 5)
library(ggplot2)
ggplot(data, aes(X, Y, fill= Z)) +
geom_tile()
#Heapmap - Viridis
# Lattice package
require(lattice)
# 2: Rcolorbrewer palette
library(RColorBrewer)
coul <- colorRampPalette(brewer.pal(8, "PiYG"))(25)
levelplot(volcano, col.regions = coul) # try cm.colors() or terrain.colors()
# 3: Viridis
library(viridisLite)
coul <- viridis(100)
levelplot(volcano, col.regions = coul)
#levelplot(volcano, col.regions = magma(100))
#Heatmap - mtcars
# The mtcars dataset:
data <- as.matrix(mtcars)
# Default Heatmap
heatmap(data)
#mtcars - Normalization
# Use 'scale' to normalize
heatmap(data, scale="column")
############################################
#Data Cleaning
data <- data.frame(x1 = c(1:4, 99999, 1, NA, 1, 1, NA), # Create example data
frame
x1 = c(1:5, 1, "NA", 1, 1, "NA"),
x1 = c(letters[c(1:3)], "x x", "x", " y y y", "x", "a", "a", NA),
x4 = "",
x5 = NA)
data # Print example data frame
#Example 1: Modify Column Names
colnames(data) # Print column names
colnames(data) <- paste0("col", 1:ncol(data)) # Modify all column names
data # Print updated data frame
############################################
#Time-Series
data(AirPassengers)
AirPassengers
str(AirPassengers)
class(AirPassengers)
#Check for missing values
sum(is.na(AirPassengers))
start(AirPassengers)
end(AirPassengers)
frequency(AirPassengers)
summary(AirPassengers)
par(mfrow=c(3,3))
plot(AirPassengers)
plot.ts(AirPassengers)
# This will fit in a line
abline(reg=lm(AirPassengers~time(AirPassengers)))
#This will print the cycle across years
cycle(AirPassengers)
plot(diff(log(AirPassengers)))
#Time Series Decomposition
#Decomposition break data into trend, seasonal, regular and random
plot(decompose(AirPassengers)) # time series decomposition
#The above figure shows the time series decomposition into trend, seasonal and
random (noise) . It is clear that the time series is non-stationary (has random
walks) because of seasonal effects and a trend (linear trend).
pacf(diff(log(AirPassengers)))
#It determine value of p (value we got as 0)
plot(data11,col="red", type="l")
lines(data22,col="blue")
#Step 7: Check normality using Q-Q plot
#qqnorm is a generic function the default method of which produces a normal
QQ plot of the values in y. qqline adds a line to a “theoretical”, by default normal,
quantile-quantile plot which passes through the probs quantiles, by default the
first and third quartiles.
qqnorm(residuals(fit))
qqline(residuals(fit))
############################################
get_sentiments("nrc") %>%
count(sentiment) # Count words by sentiment
#Geocoded Tweets
#The geocoded_tweets dataset contains three columns:
#1. state, a state in the United States
#2. word, a word used in tweets posted on Twitter
#3. freq, the average frequency of that word in that state (per billion words)
#Test
#Access nrc lexicon: nrc
nrc <- get_sentiments("nrc")
head(tweets_nrc)
tweets_nrc %>%
# Filter to only choose the words associated with sadness
filter(sentiment=="sadness") %>%
# Group by word
group_by(word) %>%
# Use the summarize verb to find the mean frequency
summarize(freq = mean(freq)) %>%
# Arrange to sort in order of descending frequency
arrange(desc(freq))
joy_words %>%
top_n(20) %>%
mutate(word = reorder(word, freq)) %>%
# Use aes() to put words on the x-axis and frequency on the y-axis
ggplot(aes(x=word, y=freq)) +
# Make a bar chart with geom_col()
geom_col() +
coord_flip()
#install.packages('wordcloud2')
library(wordcloud2)
wordcloud2(joy_words, size = 1.5,
color='random-dark')
#circle, cardioid, diamond, triangle-forward
#triangle, pentagon, star
#$$$$$$$$# calculate means and standard erroes by sex, rank and discipline
library(dplyr)
plotdata <- Salaries %>%
group_by(sex, rank, discipline) %>%
summarize(n = n(),
mean = mean(salary),
sd = sd(salary),
se = sd / sqrt(n))
###########################################
#library(MASS)
#install.packages("ISLR")
#library(ISLR)
#fix(Boston)
names(Boston)
head(Boston)
Bostondf<-data.frame(lstat=Boston$lstat,
age=Boston$age,
medv=Boston$medv)
plot(Bostondf)
pairs(Bostondf)
#install.packages("scatterplot3d") # Install
library("scatterplot3d")
scatterplot3d(Bostondf, angle=30)
#Parallel Coordinates
# Libraries
library(tidyverse)
library(hrbrthemes)
library(patchwork)
#install.packages('GGally')
library(GGally)
library(viridis)
diamonds %>%
sample_n(10) %>%
ggparcoord( column
s = c(1,5:7),
groupColumn = 2,
#order = "anyClass",
showPoints = TRUE,
title = "Diamonds features",
alphaLines = 0.3
)+
scale_color_viridis(discrete=TRUE) +
theme_ipsum()+
theme(
plot.title = element_text(size=10)
)
# Plot
data %>%
ggparcoord(
columns = 1:4, groupColumn = 5, order = "anyClass",
showPoints = TRUE,
title = "Parallel Coordinate Plot for the Iris Data",
alphaLines = 0.3
)+
scale_color_viridis(discrete=TRUE) +
theme_ipsum()+
theme(
plot.title = element_text(size=10)
)
# Plot
p1 <- data %>%
ggparcoord(
columns = 1:4, groupColumn = 5, order = "anyClass",
scale="globalminmax",
showPoints = TRUE,
title = "No scaling",
alphaLines = 0.3
)+
scale_color_viridis(discrete=TRUE) +
theme_ipsum()+
theme( legend.position="none
",
plot.title = element_text(size=10)
)+
xlab("")
p1 + p2 + p3 + p4 + plot_layout(ncol = 2)
############################################
###########Tidying Shakespearean plays
#shakespeare dataset contains 3 colomns
#1. title, the title of a Shakespearean play
#2. type, the type of play, either tragedy or comedy
#3. text, a line from that play
#5. Pipe the tidy Shakespeare data frame to the next line
# Use count to find out how many times each word is used
#7. shakespeare_sentiment
# Find how many positive/negative words each play has
#10 Most common positive and negative words and assign to word_could
# Implement sentiment analysis using the "bing" lexicon
# Count by word and sentiment
#11. extract top 10 words from word_counts and assing to top_words
# Group by sentiment
# Take the top 10 for each sentiment and ungroup it
# Make word a factor in order of n
#12 Use aes() to put words on the x-axis and n on the y-axis
# Make a bar chart with geom_col()
# facet_wrap for sentiments and apply scales as free
#Move x to y and y to x