0% found this document useful (0 votes)
2 views

Introduction to Matplotlib

The document provides a comprehensive overview of data visualization techniques using Python's Matplotlib library, focusing on various types of charts such as bar charts, scatter plots, and histograms. It includes step-by-step instructions for visualizing datasets, particularly Olympic medals and climate change data, while emphasizing the importance of readability and customization in charts. Additionally, it encourages interactive learning by prompting users to create their own visualizations based on the concepts discussed.

Uploaded by

Laís Maranhão
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

Introduction to Matplotlib

The document provides a comprehensive overview of data visualization techniques using Python's Matplotlib library, focusing on various types of charts such as bar charts, scatter plots, and histograms. It includes step-by-step instructions for visualizing datasets, particularly Olympic medals and climate change data, while emphasizing the importance of readability and customization in charts. Additionally, it encourages interactive learning by prompting users to create their own visualizations based on the concepts discussed.

Uploaded by

Laís Maranhão
Copyright
© © All Rights Reserved
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 58

import matplotlib.

pyplot as plt

fig, ax = plt.subplots()
plt.show()

seattle_weather["MONTH"]
1 Jan
2 Feb
3 Mar
...
12 Dec
seattle_weather["MLY-TAVG-NORMAL"]
1 42.1
2 43.4
3 46.6
...
12 41.1

ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"])
plt.show()

ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
plt.show()

fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"])
ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
plt.show()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"])
plt.show()

ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"], marker="o")


plt.show()

ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"], marker="v")


plt.show()

fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"], marker="v", linestyle="--")
plt.show()

fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"], marker="v", linestyle="None")
plt.show()
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"], marker="v", linestyle="--", color=
plt.show()

ax.set_xlabel("Time (months)")
ax.set_ylabel("Average temperature (Fahrenheit degrees)")
plt.show()

ax.set_title("Weather in Seattle")
plt.show()

fig, ax = plt.subplots(3, 2)
plt.show()

ax[0, 0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"], color='b')


plt.show()
fig, ax = plt.subplots(2, 1, sharey=True)
plt.show()

plt

plt

plt.subplots()
Axes.plot()

plt.show()
Axes.plot()

Axes.plot()

plt.show()

Axes.plot()

Axes.plot()

plt.show()

marker

"o"

"v"

linestyle

"--"
"None" linestyle

"r"

set_

set_xlabel

set_ylabel

set_title
subplots

Figure

Axes

ax
plot

sharey

sharey
date,co2,relative_temp
1958-03-06,315.71,0.1
1958-04-06,317.45,0.01
...
2016-12-06,404.45,0.81

DatetimeIndex

climate_change.index
DatetimeIndex(['1958-03-06', '1958-04-06', ..., '2016-12-06'], dtype='datetime64[ns]', name='date', length

import matplotlib.pyplot as plt

fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change['co2'])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
plt.show()

sixties = climate_change["1960-01-01":"1969-12-31"]
fig, ax = plt.subplots()
ax.plot(sixties.index, sixties['co2'])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
plt.show()

sixty_nine = climate_change["1969-01-01":"1969-12-31"]
fig, ax = plt.subplots()
ax.plot(sixty_nine.index, sixty_nine['co2'])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
plt.show()

fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"])
ax.plot(climate_change.index, climate_change["relative_temp"])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm) / Relative temperature')
plt.show()
fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')

ax2 = ax.twinx()
ax2.plot(climate_change.index, climate_change["relative_temp"])
ax2.set_ylabel('Relative temperature (Celsius)')
plt.show()

fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"], color='blue')
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)', color='blue')
ax.tick_params('y', colors='blue')

ax2 = ax.twinx()
ax2.plot(climate_change.index, climate_change["relative_temp"], color='red')
ax2.set_ylabel('Relative temperature (Celsius)', color='red')
ax2.tick_params('y', colors='red')
plt.show()

def plot_timeseries(axes, x, y, color, xlabel, ylabel):


axes.plot(x, y, color=color)
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel, color=color)
axes.tick_params('y', colors=color)
fig, ax = plt.subplots()
plot_timeseries(ax, climate_change.index, climate_change['co2'], 'blue', 'Time', 'CO2 (ppm)')
ax2 = ax.twinx()
plot_timeseries(ax2, climate_change.index, climate_change['relative_temp'], 'red', 'Time', 'Relative temperatur
plt.show()

ax2.annotate(">1 degree", xy=(pd.Timestamp("2015-10-06"), 1))

ax2.annotate(">1 degree",
xy=(pd.Timestamp('2015-10-06'), 1),
xytext=(pd.Timestamp('2008-10-06'), -0.2),
arrowprops={"arrowstyle":"->", "color":"gray"})
index_col
twinx

plot

set_ylabel

tick_params

plot_timeseries def
plot set_xlabel set_ylabel tick_params
annotate

xytext

arrowprops
medals = pd.read_csv('medals_by_country_2016.csv', index_col=0)
fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
plt.show()

fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()

fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()

fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"])
ax.bar(medals.index, medals["Bronze"], bottom=medals["Gold"] + medals["Silver"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()
fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"], label="Gold")
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"], label="Silver")
ax.bar(medals.index, medals["Bronze"], bottom=medals["Gold"] + medals["Silver"], label="Bronze")
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
ax.legend()
plt.show()

fig, ax = plt.subplots()
ax.bar("Rowing", mens_rowing["Height"].mean())
ax.bar("Gymnastics", mens_gymnastics["Height"].mean())
ax.set_ylabel("Height (cm)")
plt.show()

fig, ax = plt.subplots()
ax.hist(mens_rowing["Height"])
ax.hist(mens_gymnastics["Height"])
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
plt.show()

ax.hist(mens_rowing["Height"], label="Rowing")
ax.hist(mens_gymnastics["Height"], label="Gymnastics")
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()
ax.hist(mens_rowing["Height"], label="Rowing", bins=5)
ax.hist(mens_gymnastics["Height"], label="Gymnastics", bins=5)
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()

ax.hist(mens_rowing["Height"], label="Rowing",
bins=[150, 160, 170, 180, 190, 200, 210])
ax.hist(mens_gymnastics["Height"], label="Gymnastics",
bins=[150, 160, 170, 180, 190, 200, 210])
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()

ax.hist(mens_rowing["Height"], label="Rowing",
bins=[150, 160, 170, 180, 190, 200, 210],
histtype="step")
ax.hist(mens_gymnastics["Height"], label="Gymnastics",
bins=[150, 160, 170, 180, 190, 200, 210],
histtype="step")
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()
fig, ax = plt.subplots()
ax.bar("Rowing", mens_rowing["Height"].mean(), yerr=mens_rowing["Height"].std())
ax.bar("Gymnastics", mens_gymnastics["Height"].mean(), yerr=mens_gymnastics["Height"].std())
ax.set_ylabel("Height (cm)")
plt.show()

fig, ax = plt.subplots()
ax.errorbar(seattle_weather["MONTH"],
seattle_weather["MLY-TAVG-NORMAL"],
yerr=seattle_weather["MLY-TAVG-STDDEV"])
ax.errorbar(austin_weather["MONTH"],
austin_weather["MLY-TAVG-NORMAL"],
yerr=austin_weather["MLY-TAVG-STDDEV"])
ax.set_ylabel("Temperature (Fahrenheit)")
plt.show()

fig, ax = plt.subplots()
ax.boxplot([mens_rowing["Height"], mens_gymnastics["Height"]])
ax.set_xticklabels(["Rowing", "Gymnastics"])
ax.set_ylabel("Height (cm)")
plt.show()

fig, ax = plt.subplots()
ax.scatter(climate_change["co2"], climate_change["relative_temp"])
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (Celsius)")
plt.show()
eighties = climate_change["1980-01-01":"1989-12-31"]
nineties = climate_change["1990-01-01":"1999-12-31"]
fig, ax = plt.subplots()
ax.scatter(eighties["co2"], eighties["relative_temp"],
color="red", label="eighties")
ax.scatter(nineties["co2"], nineties["relative_temp"],
color="blue", label="nineties")
ax.legend()
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (Celsius)")
plt.show()

fig, ax = plt.subplots()
ax.scatter(climate_change["co2"], climate_change["relative_temp"],
c=climate_change.index)
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (Celsius)")
plt.show()

## Quantitative Comparisons: Bar Charts

### Overview of Quantitative Comparisons

> "In this chapter, we will focus on quantitative comparisons between parts of the data."

### Olympic Medals Dataset

- The dataset contains information about the number of medals won by several countries in the **2016 Olympic Ga
- Though the dataset is small, making comparisons between countries can be challenging without visualization

### Visualizing Olympic Medals Data

1. **Reading the Data**:


- Use **Pandas** to create a DataFrame from a file containing the medal data.
- The first column, containing country names, is set as the index for the DataFrame.

2. **Creating a Bar Chart for Gold Medals**:


- A **Figure** and **Axes** object are created.
- The `Axes.bar()` method is called to generate a bar chart representing the number of gold medals.
- The height of each bar corresponds to the values in the "Gold" column of the DataFrame.
- The x-axis ticks are labeled with country names from the DataFrame index.
### Adjusting the Chart for Readability

- **Rotating Tick Labels**:


- Use the `set_xticklabels()` method to rotate the labels by **90 degrees** for better readability.
- Add a label to the y-axis indicating that the height corresponds to the number of medals.

### Visualizing Other Medals

1. **Adding Silver and Bronze Medals**:


- To display silver and bronze medals in the same plot, create a **stacked bar chart**.
- The process starts similarly to the gold medals:
- Call `bar()` for the "Silver" column and use the `bottom` keyword argument to stack it on top of the gol

2. **Including Bronze Medals**:


- Add another call to `bar()` for the bronze medals, setting the `bottom` to the sum of gold and silver meda

### Full Stacked Bar Chart

- The final chart displays all three medal categories (Gold, Silver, Bronze) stacked for each country.

### Adding a Legend for Clarity

- To improve the chart's readability:


1. Add the `label` keyword argument to each `bar()` call to specify what each color represents.
2. Use the `Axes.legend()` method before calling `plt.show()` to display a legend indicating which color corr

### Final Stacked Bar Chart with Legend

- The completed figure includes a legend that clarifies the meaning of each bar color.

### Interactive Component

> "Now, you try!"

This encourages students to apply what they've learned by creating their own bar chart based on the concepts di

## Quantitative Comparisons: Scatter Plots

### Bar Charts vs. Bi-variate Comparisons

> "Bar charts show us the values of one variable across different conditions, such as different countries. But

### Introducing Scatter Plots

- **Definition**: A scatter plot is a standard visualization used for bi-variate comparisons.


- **Example**: Using climate change data:
- One column contains measurements of carbon dioxide.
- Another column contains concurrent measurements of relative temperature.
- **Plot Representation**: Each measurement is represented as a point:
- **X-axis**: Distance representing the carbon dioxide measurement.
- **Y-axis**: Height representing the temperature measurement.

#### Steps to Create a Scatter Plot:

1. Initialize a Figure and Axes objects.


2. Call the Axes scatter method:
- First argument: X-axis data (carbon dioxide).
- Second argument: Y-axis data (temperature).
3. Set axis labels for interpretation.
4. Display the figure using `plt.show()`.

### Customizing Scatter Plots

> "We can customize scatter plots in a manner that is similar to the customization that we introduced in other

- **Objective**: Show two bivariate comparisons side-by-side.


- **Data Selection**: Use time-series indexing to create two DataFrames:
- **DataFrame for the 1980s**:
- Color: Red
- Label: "eighties"
- **DataFrame for the 1990s**:
- Color: Blue
- Label: "nineties"
- **Adding Data to Axes**:
- First, add the 1980s data.
- Then, add the 1990s data.
- **Legend**: Use the legend method to distinguish between the two datasets.
- **Display**: Set axis labels and call `plt.show()`.

### Encoding a Comparison by Color

> "You can see that the relationship between temperatures and carbon dioxide didn't change much during these ye

- **Visual Distinction**: Color can be used effectively for comparison.

### Encoding a Third Variable by Color

- **Extended Comparison**: Color can also represent a third variable.


- **Example**: In the climate change data, time is represented as a continuous variable in the DataFrame index
- **Implementation**:
- Use the `c` keyword argument to encode time as color.
- Note: This is different from the previously used color keyword argument.

### Encoding Time in Color


- **Color Brightness**: The brightness of the color applied to the points indicates the time of measurements
- Dark blue points represent earlier measurements.
- Bright yellow points represent later measurements.

### Practice Making Your Own Scatter Plots!

> "In the exercises, go ahead and practice making your own scatter plots."

## Quantitative Comparisons: Histograms

### Bar Charts vs. Histograms

> "Bar-charts show us the value of a variable in different conditions. Now, we're going to look at histograms.

### Histograms

- Histograms display the distribution of a variable's values.


- Example: Data from the **2016 Olympic Games**:
- Two DataFrames extracted:
- Medal winners in **men's gymnastics**
- Medal winners in **men's rowing**
- Data includes:
- Types of medals won
- Competitor's height and weight

### Bar Chart Comparison

- A bar chart can be used to compare heights.


- Steps to create a bar chart:
1. Create **Figure** and **Axes** objects.
2. Add bars representing the mean heights:
- Rowing "Height" column
- Gymnastics "Height" column
3. Set the **y-axis label** and display the figure.

### Introducing Histograms

- Histograms provide a full distribution of values for each variable.


- Steps to create a histogram:
1. Initialize **Figure** and **Axes**.
2. Call the **Axes hist** method with the "Height" column of:
- Men's rowing DataFrame
- Men's gymnastics DataFrame
- **X-axis**: Values within the variable.
- **Height of bars**: Number of observations in each bin of values.
- Example observations:
- 12 gymnasts between 164 and 167 cm (highest bar = 12 units).
- 20 rowers between 188 and 192 cm (highest bar = 20 units).
### Importance of Labels

> "Because the x-axis label no longer provides information about which color represents which variable, labels

- To label a variable:
- Use the **label** keyword argument in the hist method.
- Call the **legend** method before **plt.show()** to display the legend.

### Customizing Histograms: Setting the Number of Bins

- Default number of bins in a histogram: **10**.


- To customize:
- Provide an integer to the **bins** keyword argument.

### Customizing Histograms: Setting Bin Boundaries

- Provide a sequence of values to set specific boundaries between bins.

### Customizing Histograms: Transparency

- To reduce occlusion of data:


- Change the histogram type from **bar** to **step** using:
- `histtype='step'`
- This displays the histogram as thin lines instead of solid bars.

### Example: Histogram with a Histtype of Step

- Allows clearer visibility of data, confirming there are rowers with a height of less than 180 cm.

### Create Your Own Histogram!

- Upcoming exercises will allow you to create your own histograms.

## Statistical Plotting

### Statistical Plotting Overview

Statistical plotting is a set of methods for using visualization to make comparisons. It allows us to formalize

### Adding Error Bars to Bar Charts

**Error Bars**
Error bars are additional markers on a plot or bar chart that provide information about the distribution of the

- **Purpose:** Summarize the distribution in one number, such as the standard deviation.
- **Example Data:** Heights of medalists in the 2016 Olympic Games.
**Implementation in Bar Chart**
To add error bars to a bar chart, use the `ax.bar` method:
- **Arguments:**
- `x`: Position of the bars
- `y`: Mean of the data (e.g., mean of the "Height" column)
- `yerr`: Standard deviation of the data (e.g., standard deviation of the "Height" column)

### Visualization of Error Bars in Bar Chart

The bar chart with error bars summarizes:


- **Mean Value:** Central tendency of the data
- **Spread of Values:** Quantified as standard deviation

### Adding Error Bars to Plots

Error bars can also be added to line plots using the `Axes.errorbar` method.

**Implementation in Line Plot**


To plot data with error bars:
- **Arguments:**
- `x`: Sequence of x values (e.g., "MONTH" column)
- `y`: Sequence of y values (e.g., column with average monthly temperatures)
- `yerr`: Column containing standard deviations of the average monthly temperatures

### Visualization of Error Bars in Line Plot

This method adds vertical markers to the plot, indicating variability in the data.

### Adding Boxplots

**Boxplot**
A boxplot is a statistical visualization technique invented by John Tukey, known as a pioneer in data science

**Implementation**
To create a boxplot:
- Use the boxplot method of the Axes object.
- Pass a list of sequences (e.g., "Height" columns from men's rowing and gymnastics).
- Label the y-axis separately.

### Interpreting Boxplots

A boxplot displays several key features of the distribution:

- **Median Height:** Indicated by a red line within the box.


- **Inter-Quartile Range (IQR):** The box edges represent the 25th and 75th percentiles.
- **Whiskers:** Extend to one and a half times the size of the IQR beyond the 25th and 75th percentiles,
- **Outliers:** Points beyond the whiskers indicate values that are unusually high or low compared to the major
**Example:** In the sample, there are three unusually short rowers and one unusually tall gymnast.

### Exercises

Students are encouraged to create their own statistical visualizations to apply the techniques discussed

### Preparing Your Figures to Share

#### Changing Plot Style


To change the plot style in Matplotlib, use the following code:
```python
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"])
ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
ax.set_xlabel("Time (months)")
ax.set_ylabel("Average temperature (Fahrenheit degrees)")
plt.show()

plt.style.use("ggplot")

plt.style.use("bmh")
plt.style.use("seaborn-colorblind")

fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()

fig.savefig("gold_medals.png")

ls
fig.savefig("gold_medals.jpg")
fig.savefig("gold_medals.jpg", quality=50)
fig.savefig("gold_medals.svg")

fig.savefig("gold_medals.png", dpi=300)

fig.set_size_inches([5, 3])

fig.set_size_inches([3, 5])
sports = summer_2016_medals["Sport"].unique()
print(sports)

fig, ax = plt.subplots()

for sport in sports:


sport_df = summer_2016_medals[summer_2016_medals["Sport"] == sport]
ax.bar(sport, sport_df["Height"].mean(),
yerr=sport_df["Height"].std())

ax.set_ylabel("Height (cm)")
ax.set_xticklabels(sports, rotation=90)
plt.show()

seaborn.relplot(x="horsepower", y="mpg", hue="origin", size="weight",


sizes=(40, 400), alpha=.5, palette="muted",
height=6, data=mpg)

## Preparing Your Figures to Share with Others

### Customization of Figure Styles

> "This chapter will focus on creating visualizations that you can share with others and incorporate into autom

- The appearance of individual elements of a figure can be customized, including:


- Line color
- Marker shapes

### Changing Plot Style

> "To see what that means, let's look at one of the figures we created in a previous lesson."

- The overall style of the figure can be changed to significantly alter its appearance.
- Example: A figure showing average temperatures in Seattle and Austin as a function of the months of the year

### Choosing a Style

> "If instead, we add this line of code before the plotting code, the figure style will look completely differe

- Adding a specific line of code allows for the emulation of styles from other libraries, such as **ggplot
- The chosen style affects multiple elements:
- Different colors
- Different fonts
- An added gray background with a faint white grid marking the x-axis and y-axis tick locations
- The selected style applies to all figures in the current session until changed.

### Back to the Default

> "For example, to go back to the default style, you would run plt.style.use('default')."

- Use the command `plt.style.use('default')` to revert to the default plotting style.

### The Available Styles

> "Matplotlib contains implementations of several different styles."

- A range of styles can be explored through a dedicated webpage showcasing visualizations created with each sty

| Style Name | Description |


|----------------------|--------------------------------------------------------------|
| **bmh** | Example of the "bmh" style's appearance |
| **seaborn-colorblind**| Style that enhances visibility for colorblind individuals |

### Seaborn Styles


> "In fact, if you visit the documentation web-page, you will see that there are several available styles that

- Seaborn is a statistical visualization library based on Matplotlib.


- Matplotlib adopts several styles developed in Seaborn, enhancing visualizations.

### Guidelines for Choosing Plotting Style

> "How would you choose which style to use?"

- **Visibility Considerations**:
- Dark backgrounds are typically discouraged unless necessary.

- **Color Considerations**:
- Use colorblind-friendly styles (e.g., **seaborn-colorblind**, **tableau-colorblind10**) to accommodate appr

- **Medium Considerations**:
- Design figures with the intended medium in mind:
- For printed reports, avoid colored backgrounds that consume more ink.
- Consider using the **grayscale** style for black-and-white printers to maintain visual differences

### Practice Choosing the Right Style

> "In the exercises, you'll practice selecting some of these styles for your own visualizations."

## Sharing Your Visualizations

> "After you have created your visualizations, you are ready to share them with your collaborators, colleagues,

### Final Customizations

- Important to finalize customizations to your figures before sharing.

## A Figure to Share

- Example figure displays data about the **number of gold medals** won by various countries in the **2016
- The code previously used `plt.show()` to display the figure on screen.

## Saving the Figure to File

- Replace `plt.show()` with the `Figure` object's `savefig` method.


- Input a file name for the function, e.g., `"gold_medals.png"`.
- The figure will save as a file on your filesystem instead of displaying on the screen.

### Example Command

- Use UNIX command `ls` to verify the saved file in the current working directory.
## Different File Formats

| Format | Compression Type | Use Case | Notes


|--------------|------------------|---------------------------------------------------|-----------------
| **PNG** | Lossless | General-purpose and high-quality images | Retains high quality bu
| **JPG** | Lossy | Images for websites | Adjustable quality
| **SVG** | Vector graphics | Editable images for advanced graphics software | Good for post-

## Resolution

- Control image quality using the `dpi` keyword argument (dots per inch).
- Higher `dpi` results in higher quality; for example, setting `dpi = 300` yields high quality.

### File Size Consideration

- Increased resolution leads to larger file sizes.

## Figure Size and Aspect Ratio

- Adjust figure size using `set_size_inches` method.


- Takes a sequence of numbers: first for width, second for height.

### Example Aspect Ratios

- Wide and short


- Long and narrow

## Practice Saving Your Visualizations!

- Exercises will follow to practice saving visualizations as files.

## Automating Figures from Data

### Strengths of Matplotlib

> "One of the strengths of Matplotlib is that, when programmed correctly, it can flexibly adapt to the inputs t

### Why Automate?

- **Flexibility**: Write functions and programs that automatically adjust based on input data.
- **Efficiency**: Automation allows for quicker figure creation compared to traditional graphical user interfac
- **Robustness**: Programs can inspect incoming data and change behaviors accordingly.
- **Reproducibility**: Automatic adjustments provide consistent behavior across different runs.

### Understanding Data Variability

#### Different Kinds of Data


- Consider a dataset about **Olympic medal winners**.
- There may be various branches of sports included in new data files, which may not be known in advance.

### Getting Unique Values of a Column

- In a pandas DataFrame, a column is a pandas Series object.


- To identify distinct sports in the data, use the `unique` method:

```python
unique_sports = df['Sport'].unique()

sport

sport_df

bar
import matplotlib.pyplot as plt

fig, ax = plt.subplots()
plt.show()
seattle_weather["MONTH"]
1 Jan
2 Feb
3 Mar
4 Apr
5 May
6 Jun
7 Jul
8 Aug
9 Sep
10 Oct
11 Nov
12 Dec

seattle_weather["MLY-TAVG-NORMAL"]
1 42.1
2 43.4
3 46.6
4 50.5
5 56.0
6 61.0
7 65.9
8 66.5
9 61.6
10 53.3
11 46.2
12 41.1

ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"])
plt.show()

ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
plt.show()

fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"])
ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
plt.show()

ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"])
plt.show()

ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"], marker="o")


plt.show()

ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"], marker="v")


plt.show()

ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"], marker="v", linestyle="--")


plt.show()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"], marker="v", linestyle="None")
plt.show()

ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"], marker="v", linestyle="--", color


plt.show()

ax.set_xlabel("Time (months)")
plt.show()

ax.set_ylabel("Average temperature (Fahrenheit degrees)")


plt.show()

ax.set_title("Weather in Seattle")
plt.show()

fig, ax = plt.subplots(3, 2)
plt.show()
ax[0, 0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"], color='b')
plt.show()

fig, ax = plt.subplots(2, 1)
ax[0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-NORMAL"], color='b')
ax[0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-25PCTL"], linestyle='--', color='b')
ax[0].plot(seattle_weather["MONTH"], seattle_weather["MLY-PRCP-75PCTL"], linestyle='--', color='b')
ax[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-NORMAL"], color='r')
ax[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-25PCTL"], linestyle='--', color='r')
ax[1].plot(austin_weather["MONTH"], austin_weather["MLY-PRCP-75PCTL"], linestyle='--', color='r')
ax[0].set_ylabel("Precipitation (inches)")
ax[1].set_ylabel("Precipitation (inches)")
ax[1].set_xlabel("Time (months)")
plt.show()

fig, ax = plt.subplots(2, 1, sharey=True)


DatetimeIndex

climate_change.index
DatetimeIndex(['1958-03-06', '1958-04-06', '1958-05-06', ...],
dtype='datetime64[ns]', name='date', length=706, freq=None)

climate_change['relative_temp']
climate_change['co2']

import matplotlib.pyplot as plt

fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change['co2'])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
plt.show()
sixties = climate_change["1960-01-01":"1969-12-31"]
fig, ax = plt.subplots()
ax.plot(sixties.index, sixties['co2'])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
plt.show()

sixty_nine = climate_change["1969-01-01":"1969-12-31"]
fig, ax = plt.subplots()
ax.plot(sixty_nine.index, sixty_nine['co2'])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')
plt.show()

import pandas as pd
climate_change = pd.read_csv('climate_change.csv',
parse_dates=["date"],
index_col="date")

fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"])
ax.plot(climate_change.index, climate_change["relative_temp"])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm) / Relative temperature')
plt.show()
fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"])
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)')

ax2 = ax.twinx()
ax2.plot(climate_change.index, climate_change["relative_temp"])
ax2.set_ylabel('Relative temperature (Celsius)')
plt.show()

fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"], color='blue')
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)', color='blue')

ax2 = ax.twinx()
ax2.plot(climate_change.index, climate_change["relative_temp"], color='red')
ax2.set_ylabel('Relative temperature (Celsius)', color='red')
plt.show()

fig, ax = plt.subplots()
ax.plot(climate_change.index, climate_change["co2"], color='blue')
ax.set_xlabel('Time')
ax.set_ylabel('CO2 (ppm)', color='blue')
ax.tick_params('y', colors='blue')

ax2 = ax.twinx()
ax2.plot(climate_change.index, climate_change["relative_temp"], color='red')
ax2.set_ylabel('Relative temperature (Celsius)', color='red')
ax2.tick_params('y', colors='red')
plt.show()
fig, ax = plt.subplots()
plot_timeseries(ax, climate_change.index, climate_change['co2'],
'blue', 'Time', 'CO2 (ppm)')
ax2 = ax.twinx()
plot_timeseries(ax2, climate_change.index, climate_change['relative_temp'],
'red', 'Time', 'Relative temperature (Celsius)')
ax2.annotate(">1 degree", xy=(pd.Timestamp("2015-10-06"), 1))
plt.show()

ax2.annotate(">1 degree",
xy=(pd.Timestamp('2015-10-06'), 1),
xytext=(pd.Timestamp('2008-10-06'), -0.2),
arrowprops={"arrowstyle":"->", "color":"gray"})

def plot_timeseries(axes, x, y, color, xlabel, ylabel):


axes.plot(x, y, color=color)
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel, color=color)
axes.tick_params('y', colors=color)

fig, ax = plt.subplots()
plot_timeseries(ax, climate_change.index, climate_change['co2'],
'blue', 'Time', 'CO2 (ppm)')
ax2 = ax.twinx()
plot_timeseries(ax2, climate_change.index,
climate_change['relative_temp'],
'red', 'Time', 'Relative temperature (Celsius)')
plt.show()

medals = pd.read_csv('medals_by_country_2016.csv', index_col=0)


fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
plt.show()
fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()

fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"])
ax.bar(medals.index, medals["Bronze"], bottom=medals["Gold"] + medals["Silver"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()

fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"], label="Gold")
ax.bar(medals.index, medals["Silver"], bottom=medals["Gold"], label="Silver")
ax.bar(medals.index, medals["Bronze"], bottom=medals["Gold"] + medals["Silver"], label="Bronze")

ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
ax.legend()
plt.show()
fig, ax = plt.subplots()
ax.hist(mens_rowing["Height"])
ax.hist(mens_gymnastics["Height"])
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
plt.show()

ax.hist(mens_rowing["Height"], label="Rowing", bins=5)


ax.hist(mens_gymnastics["Height"], label="Gymnastics", bins=5)
ax.set_xlabel("Height (cm)")
ax.set_ylabel("# of observations")
ax.legend()
plt.show()

fig, ax = plt.subplots()
ax.scatter(climate_change["co2"], climate_change["relative_temp"])
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (Celsius)")
plt.show()

eighties = climate_change["1980-01-01":"1989-12-31"]
nineties = climate_change["1990-01-01":"1999-12-31"]
fig, ax = plt.subplots()
ax.scatter(eighties["co2"], eighties["relative_temp"], color="red", label="eighties")
ax.scatter(nineties["co2"], nineties["relative_temp"], color="blue", label="nineties")
ax.legend()
ax.set_xlabel("CO2 (ppm)")
ax.set_ylabel("Relative temperature (Celsius)")
plt.show()

fig, ax = plt.subplots()
ax.bar("Rowing", mens_rowing["Height"].mean(), yerr=mens_rowing["Height"].std())
ax.bar("Gymnastics", mens_gymnastics["Height"].mean(), yerr=mens_gymnastics["Height"].std())
ax.set_ylabel("Height (cm)")
plt.show()

fig, ax = plt.subplots()
ax.errorbar(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"],
yerr=seattle_weather["MLY-TAVG-STDDEV"])
ax.errorbar(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"],
yerr=austin_weather["MLY-TAVG-STDDEV"])
ax.set_ylabel("Temperature (Fahrenheit)")
plt.show()

fig, ax = plt.subplots()
ax.boxplot([mens_rowing["Height"], mens_gymnastics["Height"]])
ax.set_xticklabels(["Rowing", "Gymnastics"])
ax.set_ylabel("Height (cm)")
plt.show()

## Introduction to Data Visualization with Matplotlib

### Preparing Your Figures to Share

#### Changing Plot Style


To change the plot style in Matplotlib, you can use the following code:
```python
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"])
ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
ax.set_xlabel("Time (months)")
ax.set_ylabel("Average temperature (Fahrenheit degrees)")
plt.show()

plt.style.use()

plt.style.use("ggplot")
fig, ax = plt.subplots()
ax.plot(seattle_weather["MONTH"], seattle_weather["MLY-TAVG-NORMAL"])
ax.plot(austin_weather["MONTH"], austin_weather["MLY-TAVG-NORMAL"])
ax.set_xlabel("Time (months)")
ax.set_ylabel("Average temperature (Fahrenheit degrees)")
plt.show()

plt.style.use("default")

plt.style.use("bmh")
plt.style.use("seaborn-colorblind")

fig, ax = plt.subplots()
ax.bar(medals.index, medals["Gold"])
ax.set_xticklabels(medals.index, rotation=90)
ax.set_ylabel("Number of medals")
plt.show()

fig.savefig("gold_medals.png")

ls
fig.savefig("gold_medals.jpg")
fig.savefig("gold_medals.jpg", quality=50)
fig.savefig("gold_medals.svg")

fig.savefig("gold_medals.png", dpi=300)

fig.set_size_inches([5, 3])

fig.set_size_inches([3, 5])

sports = summer_2016_medals["Sport"].unique()
print(sports)

fig, ax = plt.subplots()
for sport in sports:
sport_df = summer_2016_medals[summer_2016_medals["Sport"] == sport]
ax.bar(sport, sport_df["Height"].mean(), yerr=sport_df["Height"].std())
ax.set_ylabel("Height (cm)")
ax.set_xticklabels(sports, rotation=90)
plt.show()

seaborn.relplot(x="horsepower", y="mpg", hue="origin", size="weight",


sizes=(40, 400), alpha=.5, palette="muted",
height=6, data=mpg)
unique

sport

sport_d_f

bar
unique

unique_sports = df['Sport'].unique()
sport

sport_df sport

bar

sport

sport_df['Height'].mean()

sport_df['Height'].std()

You might also like