Numpy
Numpy
print(homelessness.shape)
4) Subsetting columns
# Print a description of homelessness
# Select the individuals column
print(homelessness.describe())
individuals = homelessness["individuals"]
2) Parts of a DataFrame # Print the head of the result
print(homelessness.columns) # Select only the individuals and state columns, in that order
# A custom IQR functiondef iqr(column): Remove rows of sales with duplicate pairs of store and type and save
return column.quantile(0.75) - column.quantile(0.25) as store_types and print the head.
Remove rows of sales with duplicate pairs of store and department and save
# Print IQR of the temperature_c column as store_depts and print the head.
print(sales["temperature_c"].agg(iqr))
# A custom IQR functiondef iqr(column): # Drop duplicate store/type combinations
return column.quantile(0.75) - column.quantile(0.25) store_types = sales.drop_duplicates(subset=["store", "type"])
# Update to print IQR of temperature_c, fuel_price_usd_per_l, & unemployment print(store_types.head())
print(sales[["temperature_c", "fuel_price_usd_per_l", "unemployment"]].agg(iqr)) # Drop duplicate store/department combinations
# Import NumPy and create custom IQR functionimport numpy as npdef iqr(colum store_depts = sales.drop_duplicates(subset=["store", "department"])
n):
print(store_depts.head())
return column.quantile(0.75) - column.quantile(0.25)
# Subset the rows where is_holiday is True and drop duplicate dates
# Update to print IQR and median of temperature_c, fuel_price_usd_per_l, & unem
holiday_dates = sales[sales["is_holiday"]].drop_duplicates(subset="date")
ployment
# Print date col of holiday_dates
print(sales[["temperature_c", "fuel_price_usd_per_l", "unemployment"]].agg([iqr, n
p.median])) print(holiday_dates["date"])
Slicing index values # Use .loc[] to subset temperatures_ind for rows from Aug 2010 to Feb 2011
print(temperatures_ind.loc["2010-08":"2011-02"])
# Sort the index of temperatures_ind
temperatures_srt = temperatures_ind.sort_index() Subsetting by row/column number
# Subset rows from Pakistan to Russia
# Get 23rd row, 2nd column (index 22, 1)
print(temperatures_srt.loc["Pakistan":"Russia"])
print(temperatures.iloc[22, 1])
# Try to subset rows from Lahore to Moscow
# Use slicing to get the first 5 rows
print(temperatures_srt.loc["Lahore":"Moscow"])
print(temperatures.iloc[:5])
# Subset rows from Pakistan, Lahore to Russia, Moscow
# Use slicing to get columns 3 to 4
print(temperatures_srt.loc[("Pakistan", "Lahore"):("Russia", "Moscow")])
print(temperatures.iloc[:, 2:4])
# Subset for Egypt, Cairo to India, Delhi, and 2005 to 2010 # Import matplotlib.pyplot with alias pltimport matplotlib.pyplot as plt
temp_by_country_city_vs_year.loc[("Egypt", "Cairo"):("India", "Delhi"), "2005":"2 # Get the total number of avocados sold on each date
010"]
nb_sold_by_date = avocados.groupby("date")["nb_sold"].sum()
Calculating on a pivot table # Create a line plot of the number of avocados sold by date
nb_sold_by_date.plot(kind="line")
# Get the worldwide mean temp by year
# Show the plot
mean_temp_by_year = temp_by_country_city_vs_year.mean()
plt.show()
# Filter for the year that had the highest mean temp
print(mean_temp_by_year[mean_temp_by_year == mean_temp_by_year.max()]) Avocado supply and demand
# Get the mean temp by city
# Scatter plot of avg_price vs. nb_sold with title
mean_temp_by_city = temp_by_country_city_vs_year.mean(axis="columns")
avocados.plot(x="nb_sold", y="avg_price", kind="scatter", title="Number of avocad
# Filter for the city that had the lowest mean temp os sold vs. average price")
print(mean_temp_by_city[mean_temp_by_city == mean_temp_by_city.min()]) # Show the plot
plt.show()
Which avocado size is most popular?
CSV to DataFrame
DataFrame to CSV
# Create airline_totals_sorted
airline_totals_sorted = airline_totals.sort_values("bumps_per_10k", ascending=Fals
e)