0% found this document useful (0 votes)
3 views

Assignment-6

Uploaded by

datakushagra
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views

Assignment-6

Uploaded by

datakushagra
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

Assignment - 6

# Load necessary libraries


install.packages("dplyr")
library(dplyr)

#Q1)
# Create the dataset
set.seed(123)
data <- data.frame(
Country = sample(c("USA", "Germany", "India", "France", "Japan", "China", "Brazil", "UK",
"Canada", "Russia",
"Italy", "South Africa", "Australia", "Spain", "Mexico", "Nigeria", "Argentina",
"Egypt", "Turkey", "Sweden"), 20, replace = FALSE),
Continent = c(rep("North America", 4), rep("Europe", 6), rep("Asia", 4), rep("Africa", 3),
rep("South America", 2), rep("Australia", 1)),
Year = sample(2000:2020, 20, replace = TRUE),
LifeExp = runif(20, 50, 85),
Pop = sample(1e6:1e8, 20, replace = TRUE),
gdpPerc = runif(20, 1000, 50000)
)
print(data)
# 1. Unique countries per continent
data %>%
group_by(Continent) %>%
summarise(UniqueCountries = n_distinct(Country))

# 2. European nation with the lowest GDP per capita in a given year
data %>%
filter(Continent == "Europe") %>%
arrange(gdpPerc) %>%
slice(1)

# 3. Average life expectancy across each continent in a given year


data %>%
group_by(Continent, Year) %>%
summarise(AverageLifeExp = mean(LifeExp, na.rm = TRUE))

# 4. Top 5 countries with the highest total GDP over all years
data %>%
mutate(TotalGDP = Pop * gdpPerc) %>%
group_by(Country) %>%
summarise(TotalGDP = sum(TotalGDP)) %>%
arrange(desc(TotalGDP)) %>%
slice(1:5)

# 5. Countries and years with life expectancy >= 80


data %>%
filter(LifeExp >= 80) %>%
select(Country, Year, LifeExp)

# 6. Top 10 countries with strongest correlation between life expectancy and GDP per capita
data %>%
group_by(Country) %>%
summarise(Correlation = cor(LifeExp, gdpPerc)) %>%
arrange(desc(abs(Correlation))) %>%
slice(1:10)

# 7. Combinations of continent (besides Asia) and year with the highest average population
data %>%
filter(Continent != "Asia") %>%
group_by(Continent, Year) %>%
summarise(AveragePop = mean(Pop, na.rm = TRUE)) %>%
arrange(desc(AveragePop)) %>%
slice(1)

# 8. Three countries with the most consistent population estimates


data %>%
group_by(Country) %>%
summarise(PopSD = sd(Pop)) %>%
arrange(PopSD) %>%
slice(1:3)

# 9. Population decreased, and life expectancy increased from the previous year
data %>%
arrange(Country, Year) %>%
group_by(Country) %>%
mutate(PopChange = Pop - lag(Pop),
LifeExpChange = LifeExp - lag(LifeExp)) %>%
filter(PopChange < 0 & LifeExpChange > 0)

#Q2)
# Create the dataset and save it as a CSV file
med_data <- data.frame(
MedID = 1:10,
Med_Name = paste0("Med", 1:10),
Company = c("A", "B", "C", "A", "B", "C", "A", "D", "E", "F"),
Manf_year = sample(2000:2020, 10, replace = TRUE),
Exp_date = as.Date(sample(18250:18500, 10, replace = TRUE), origin = "1970-01-01"),
Quantity_in_stock = sample(50:500, 10),
Sales = sample(1000:5000, 10)
)
med_data
# write.csv(med_data, "DataSet.csv", row.names = FALSE)

# Read the data file


med_data <- read.csv("DataSet.csv")

# 1. Show the first 4 records


head(med_data, 4)

# 2. Show the last 4 records


tail(med_data, 4)

# 3. Correlation between Quantity_in_stock and Exp_date


cor(med_data$Quantity_in_stock, as.numeric(med_data$Exp_date))

# 4. Bar graph for Sales with year of manufacturing


install.packages("ggplot2")
library(ggplot2)
ggplot(med_data, aes(x = factor(Manf_year), y = Sales)) +
geom_bar(stat = "identity", fill = "blue") +
labs(x = "Manufacturing Year", y = "Sales", title = "Sales by Manufacturing Year")

# 5. Companies with more than one type of medicine


med_data %>%
group_by(Company) %>%
summarise(NumMeds = n()) %>%
filter(NumMeds > 1)

# 6. Types of medicines available


unique(med_data$Med_Name)

# 7. Medicines expiring
med_data %>%
filter(Exp_date < Sys.Date())

# 8. Average stock in the store


mean(med_data$Quantity_in_stock)

# 9. Regression line between Manufacturing year and Sales


ggplot(med_data, aes(x = Manf_year, y = Sales)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(x = "Manufacturing Year", y = "Sales", title = "Regression of Sales vs Manufacturing
Year")

You might also like