Parallel Programming in R
Parallel Programming in R
//2
# Partitioning
result <- lapply(letters, max_frequency,
words = words, min_length = 5) %>% unlist()
//////////////////////////
# Vector of words from all six books
words <- janeausten_words()
# Partitioning
result <- lapply(letters, max_frequency,
words = words, min_length = 5) %>% unlist()
# Barplot of result
barplot(result, las = 2)
# Try it out
mean_of_rnorm(100)
//////////////////////////
# From previous step
mean_of_rnorm <- function(n) {
random_numbers <- rnorm(n)
mean(random_numbers)
}
//2
# rbind results
do.call(rbind, trajectories_by_block)
}
/////////////////////////
# From previous step
ar1_multiple_blocks_of_trajectories <- function(ids, ...) {
trajectories_by_block <- lapply(ids, ar1_block_of_trajectories, ...)
do.call(rbind, trajectories_by_block)
}
# Show results
show_migration(trajs)
# Create a cluster
cl <- makeCluster(ncores)
Sum in parallel
# Evaluate partial sums in parallel
part_sums <- clusterApply(cl, x = c(1, 51),
fun = function(x) sum(x:(x + 49)))
# Total sum
total <- sum(unlist(part_sums))
Benchmarking setup
# Wrap this code into a function
mean_of_rnorm_sequentially <- function(n_numbers_per_replicate, n_replicates)
{
n <- rep(n_numbers_per_replicate, n_replicates)
lapply(n, mean_of_rnorm)
}
# Call it to try it
mean_of_rnorm_sequentially(1000, 5)
# Run a microbenchmark
microbenchmark(
# Call mean_of_rnorm_sequentially()
mean_of_rnorm_sequentially(n_numbers_per_replicate, n_replicates),
# Call mean_of_rnorm_in_parallel()
mean_of_rnorm_in_parallel(n_numbers_per_replicate, n_replicates),
times = 1,
unit = "s"
)
///////////////////////
# Change the numbers per replicate to 100
n_numbers_per_replicate <- 100
# Change number of replicates to 100
n_replicates <- 100
# Parameters
n_numbers_per_replicate <- 1000
n_replicates <- 20
# Set sd to 5, globally
sd <- 5
# Generate 1000 numbers with myrdnorm()
myrdnorm(1000)
///////////////////
# From previous step
myrdnorm <- function(n) {
rdnorm(n, mean = mean, sd = sd)
}
Alternative chunking
# Split task into 5 chunks
ind <- splitIndices(nrow(ar1est), 5)
//2
# Plot results
barplot(result, las = 2)
# Plot results
barplot(result, las = 2)
Using doParallel
# Register doParallel with 3 cores
registerDoParallel(cores = 3)
# foreach()%dopar% loop
res <- foreach(r = rep(1000, 100), .combine = rbind,
.packages = "extraDistr") %dopar% myrdnorm(r)
# Dimensions of res
dim_res <- dim(res)
# foreach loop
foreach(let = chars, .combine = c,
.export = c("max_frequency", "select_words", "words"),
.packages = c("janeaustenr", "stringr")) %dopar%
max_frequency(let, words = words, min_length = min_length)
}
# Run on 2 cores
freq_doPar(2)
# foreach loop
foreach(let = chars, .combine = c) %dopar%
max_frequency(let, words = words, min_length = min_length)
}
///////////////////
# Benchmark
microbenchmark(freq_seq(min_length),
freq_doPar(cores, min_length),
freq_doFut(cores, min_length),
times = 1)
# Extract words
words <- extract_words_from_text(obama_speech)
# Plot results
barplot(res, las = 2)
Planning future
# multicore function
fapply_mc <- function(cores = 2, ...) {
# future plan
plan(multicore, workers = cores)
freq_fapply(words, chars, ...)
}
# cluster function
fapply_cl <- function(cores = NULL, ...) {
# default value for cores
if(is.null(cores))
cores <- rep(c("oisin", "oscar"), each = 16)
# future plan
plan(cluster, workers = cores)
freq_fapply(words, chars, ...)
}
Benchmark future
# Microbenchmark
microbenchmark(fapply_seq = fapply_seq(),
fapply_mc_2 = fapply_mc(cores = 2),
fapply_mc_10 = fapply_mc(cores = 10),
fapply_cl = fapply_cl(cores = 2),
times = 1)
//////////////////////
//4
Load balancing
# Benchmark clusterApply and clusterApplyLB
microbenchmark(
clusterApply(cl, tasktime, Sys.sleep),
clusterApplyLB(cl, tasktime, Sys.sleep),
times = 1
)
# Plot cluster usage
plot_cluster_apply(cl, tasktime, Sys.sleep)
plot_cluster_applyLB(cl, tasktime, Sys.sleep)
Scheduling
# Plot cluster usage for parSapply
plot_parSapply(cl, tasktime, Sys.sleep)
# Microbenchmark
microbenchmark(
clusterApplyLB(cl, bias_tasktime, Sys.sleep),
parSapply(cl, bias_tasktime, Sys.sleep),
times = 1
)
//4
replicate(
# Use 2 replicates
n = 2,
expr = {
# Set the seed to 100
set.seed(100)
# Run two iterations in parallel, bound by rows
foreach(i = 1:2, .combine = rbind) %dopar% rnorm(3)
},
simplify = FALSE
)
///////////////////////
# Change this to register the FORK cluster
registerDoParallel(cl.fork)
Setting an RNG
# Create a cluster
cl <- makeCluster(2)
t(replicate(
# Use 3 replicates
n = 3,
expr = {
# Spread across cl, apply mean_of_rnorm() to n_vec
clusterApply(cl, n_vec, mean_of_rnorm)
}
))
//////////////////////
# The cluster, & how many numbers to generate
cl
n_vec
t(replicate(
# Use 3 replicates
n = 3,
expr = {
# Spread across cl, apply mean_of_rnorm() to n_vec
clusterApply(cl, n_vec, mean_of_rnorm)
}
))
//////////////////////
# Make a cluster of size 2
cl2 <- makeCluster(2)
//4