Assignment-6
Assignment-6
#Q1)
# Create the dataset
set.seed(123)
data <- data.frame(
Country = sample(c("USA", "Germany", "India", "France", "Japan", "China", "Brazil", "UK",
"Canada", "Russia",
"Italy", "South Africa", "Australia", "Spain", "Mexico", "Nigeria", "Argentina",
"Egypt", "Turkey", "Sweden"), 20, replace = FALSE),
Continent = c(rep("North America", 4), rep("Europe", 6), rep("Asia", 4), rep("Africa", 3),
rep("South America", 2), rep("Australia", 1)),
Year = sample(2000:2020, 20, replace = TRUE),
LifeExp = runif(20, 50, 85),
Pop = sample(1e6:1e8, 20, replace = TRUE),
gdpPerc = runif(20, 1000, 50000)
)
print(data)
# 1. Unique countries per continent
data %>%
group_by(Continent) %>%
summarise(UniqueCountries = n_distinct(Country))
# 2. European nation with the lowest GDP per capita in a given year
data %>%
filter(Continent == "Europe") %>%
arrange(gdpPerc) %>%
slice(1)
# 4. Top 5 countries with the highest total GDP over all years
data %>%
mutate(TotalGDP = Pop * gdpPerc) %>%
group_by(Country) %>%
summarise(TotalGDP = sum(TotalGDP)) %>%
arrange(desc(TotalGDP)) %>%
slice(1:5)
# 6. Top 10 countries with strongest correlation between life expectancy and GDP per capita
data %>%
group_by(Country) %>%
summarise(Correlation = cor(LifeExp, gdpPerc)) %>%
arrange(desc(abs(Correlation))) %>%
slice(1:10)
# 7. Combinations of continent (besides Asia) and year with the highest average population
data %>%
filter(Continent != "Asia") %>%
group_by(Continent, Year) %>%
summarise(AveragePop = mean(Pop, na.rm = TRUE)) %>%
arrange(desc(AveragePop)) %>%
slice(1)
# 9. Population decreased, and life expectancy increased from the previous year
data %>%
arrange(Country, Year) %>%
group_by(Country) %>%
mutate(PopChange = Pop - lag(Pop),
LifeExpChange = LifeExp - lag(LifeExp)) %>%
filter(PopChange < 0 & LifeExpChange > 0)
#Q2)
# Create the dataset and save it as a CSV file
med_data <- data.frame(
MedID = 1:10,
Med_Name = paste0("Med", 1:10),
Company = c("A", "B", "C", "A", "B", "C", "A", "D", "E", "F"),
Manf_year = sample(2000:2020, 10, replace = TRUE),
Exp_date = as.Date(sample(18250:18500, 10, replace = TRUE), origin = "1970-01-01"),
Quantity_in_stock = sample(50:500, 10),
Sales = sample(1000:5000, 10)
)
med_data
# write.csv(med_data, "DataSet.csv", row.names = FALSE)
# 7. Medicines expiring
med_data %>%
filter(Exp_date < Sys.Date())