Scalable Data Processing in R
Scalable Data Processing in R
Scalable Data Processing in R
# Create a table of the number of mortgages for each year in the data set
table(mort[, "year"])
# Set the value in the first row and first column of first_three to NA
first_three[1, 1] <- NA
Split
# Split the row numbers of the mortage data by year
spl <- split(1:nrow(mort), mort[, "year"])
Apply
# For each of the row splits, find the female residence proportion
all_years <- Map(function(rows) female_residence_prop(mort, rows), spl)
Combine
# Collect the results as rows in a matrix
prop_female <- Reduce(rbind, all_years)
bir_df_wide
bir_df_wide %>%
# Transform the wide-formatted data.frame into the long format
gather(Year, Count, -BIR) %>%
# Use ggplot to create a line plot
ggplot(aes(x = Year, y = Count, group = BIR, color = BIR)) +
geom_line()
//2
//4
//3
Borrower Race and Ethnicity by Year (II)
# Open a connection to the file and skip the header
fc <- file("mortgage-sample.csv", "rb")
readLines(fc, n = 1)
# Close connection
close(fc)
# View pop_proportion
pop_proportion
# Plot
ggplot(rydfl, aes(x = Year, y = Adjusted_Count, group = Race, color = Race))
+
geom_line()
# Close connection
close(fc)
# Rename columns
df_msa$MSA <- c("rural", "city")
# Plot
ggplot(df_msa_long, aes(x = Year, y = Count, group = MSA, color = MSA)) +
geom_line()
# Print
ir_by_fg