0% found this document useful (0 votes)
6 views

Assignment-6

Uploaded by

datakushagra
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views

Assignment-6

Uploaded by

datakushagra
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 5

Assignment - 6

# Load necessary libraries


install.packages("dplyr")
library(dplyr)

#Q1)
# Create the dataset
set.seed(123)
data <- data.frame(
Country = sample(c("USA", "Germany", "India", "France", "Japan", "China", "Brazil", "UK",
"Canada", "Russia",
"Italy", "South Africa", "Australia", "Spain", "Mexico", "Nigeria", "Argentina",
"Egypt", "Turkey", "Sweden"), 20, replace = FALSE),
Continent = c(rep("North America", 4), rep("Europe", 6), rep("Asia", 4), rep("Africa", 3),
rep("South America", 2), rep("Australia", 1)),
Year = sample(2000:2020, 20, replace = TRUE),
LifeExp = runif(20, 50, 85),
Pop = sample(1e6:1e8, 20, replace = TRUE),
gdpPerc = runif(20, 1000, 50000)
)
print(data)
# 1. Unique countries per continent
data %>%
group_by(Continent) %>%
summarise(UniqueCountries = n_distinct(Country))

# 2. European nation with the lowest GDP per capita in a given year
data %>%
filter(Continent == "Europe") %>%
arrange(gdpPerc) %>%
slice(1)

# 3. Average life expectancy across each continent in a given year


data %>%
group_by(Continent, Year) %>%
summarise(AverageLifeExp = mean(LifeExp, na.rm = TRUE))

# 4. Top 5 countries with the highest total GDP over all years
data %>%
mutate(TotalGDP = Pop * gdpPerc) %>%
group_by(Country) %>%
summarise(TotalGDP = sum(TotalGDP)) %>%
arrange(desc(TotalGDP)) %>%
slice(1:5)

# 5. Countries and years with life expectancy >= 80


data %>%
filter(LifeExp >= 80) %>%
select(Country, Year, LifeExp)

# 6. Top 10 countries with strongest correlation between life expectancy and GDP per capita
data %>%
group_by(Country) %>%
summarise(Correlation = cor(LifeExp, gdpPerc)) %>%
arrange(desc(abs(Correlation))) %>%
slice(1:10)

# 7. Combinations of continent (besides Asia) and year with the highest average population
data %>%
filter(Continent != "Asia") %>%
group_by(Continent, Year) %>%
summarise(AveragePop = mean(Pop, na.rm = TRUE)) %>%
arrange(desc(AveragePop)) %>%
slice(1)

# 8. Three countries with the most consistent population estimates


data %>%
group_by(Country) %>%
summarise(PopSD = sd(Pop)) %>%
arrange(PopSD) %>%
slice(1:3)

# 9. Population decreased, and life expectancy increased from the previous year
data %>%
arrange(Country, Year) %>%
group_by(Country) %>%
mutate(PopChange = Pop - lag(Pop),
LifeExpChange = LifeExp - lag(LifeExp)) %>%
filter(PopChange < 0 & LifeExpChange > 0)

#Q2)
# Create the dataset and save it as a CSV file
med_data <- data.frame(
MedID = 1:10,
Med_Name = paste0("Med", 1:10),
Company = c("A", "B", "C", "A", "B", "C", "A", "D", "E", "F"),
Manf_year = sample(2000:2020, 10, replace = TRUE),
Exp_date = as.Date(sample(18250:18500, 10, replace = TRUE), origin = "1970-01-01"),
Quantity_in_stock = sample(50:500, 10),
Sales = sample(1000:5000, 10)
)
med_data
# write.csv(med_data, "DataSet.csv", row.names = FALSE)

# Read the data file


med_data <- read.csv("DataSet.csv")

# 1. Show the first 4 records


head(med_data, 4)

# 2. Show the last 4 records


tail(med_data, 4)

# 3. Correlation between Quantity_in_stock and Exp_date


cor(med_data$Quantity_in_stock, as.numeric(med_data$Exp_date))

# 4. Bar graph for Sales with year of manufacturing


install.packages("ggplot2")
library(ggplot2)
ggplot(med_data, aes(x = factor(Manf_year), y = Sales)) +
geom_bar(stat = "identity", fill = "blue") +
labs(x = "Manufacturing Year", y = "Sales", title = "Sales by Manufacturing Year")

# 5. Companies with more than one type of medicine


med_data %>%
group_by(Company) %>%
summarise(NumMeds = n()) %>%
filter(NumMeds > 1)

# 6. Types of medicines available


unique(med_data$Med_Name)

# 7. Medicines expiring
med_data %>%
filter(Exp_date < Sys.Date())

# 8. Average stock in the store


mean(med_data$Quantity_in_stock)

# 9. Regression line between Manufacturing year and Sales


ggplot(med_data, aes(x = Manf_year, y = Sales)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
labs(x = "Manufacturing Year", y = "Sales", title = "Regression of Sales vs Manufacturing
Year")

You might also like