Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/publish-slides.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ jobs:
run: ./slides/generate_slides.sh regular
- name: Package files
run: |
mkdir public && \
mkdir -p public/media && \
cp slides/media/* public/media/ && \
cp slides/html/workshop.slides.html public/index.html;
- name: Upload artifact
uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b # v4.0.0
Expand Down
42 changes: 21 additions & 21 deletions notebooks/1-getting_started_with_matplotlib.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1596,7 +1596,7 @@
],
"source": [
"stackoverflow_monthly.matplotlib.plot(\n",
" figsize=(8, 2), xlabel='creation date', ylabel='total questions', \n",
" figsize=(8, 2), xlabel='creation date', ylabel='total questions',\n",
" title='Matplotlib Questions per Month\\n(since the creation of Stack Overflow)'\n",
")"
]
Expand Down Expand Up @@ -1729,7 +1729,7 @@
"The `Figure` object is the container for all components of our visualization. It contains one or more `Axes` objects, which can be thought of as the (sub)plots, as well as other [*Artists*](https://matplotlib.org/stable/tutorials/intermediate/artists.html), which draw on the plot canvas (x-axis, y-axis, legend, lines, etc.). The following image from the Matplotlib documentation illustrates the different components of a figure:\n",
"\n",
"<div style=\"text-align: center;\">\n",
" <img width=\"40%\" src=\"https://raw.githubusercontent.com/stefmolin/python-data-viz-workshop/main/media/figure_anatomy.svg\" alt=\"Matplotlib figure anatomy\" style=\"min-width: 400px\">\n",
" <img width=\"40%\" src=\"../slides/media/figure_anatomy.svg\" alt=\"Matplotlib figure anatomy\" style=\"min-width: 400px\">\n",
" <div><small><em><a href=\"https://matplotlib.org/stable/tutorials/introductory/quick_start.html#parts-of-a-figure\">Source</a></em></small></div>\n",
"</div>"
]
Expand Down Expand Up @@ -5705,7 +5705,7 @@
],
"source": [
"ax = stackoverflow_monthly.matplotlib.plot(\n",
" figsize=(8, 2), xlabel='creation date', ylabel='total questions', \n",
" figsize=(8, 2), xlabel='creation date', ylabel='total questions',\n",
" title='Matplotlib Questions per Month\\n(since the creation of Stack Overflow)'\n",
")\n",
"ax.set_ylim(0, None) # this can also be done with pandas\n",
Expand Down Expand Up @@ -21035,7 +21035,7 @@
"fig, ax = plt.subplots(figsize=(8, 2))\n",
"ax.plot(avgs.index, avgs.matplotlib)\n",
"ax.fill_between(\n",
" avgs.index, avgs.matplotlib - 2 * stds.matplotlib, \n",
" avgs.index, avgs.matplotlib - 2 * stds.matplotlib,\n",
" avgs.matplotlib + 2 * stds.matplotlib, alpha=0.25\n",
")"
]
Expand Down Expand Up @@ -33816,7 +33816,7 @@
"fig, ax = plt.subplots(figsize=(8, 2))\n",
"ax.plot(avgs.index, avgs.matplotlib)\n",
"ax.fill_between(\n",
" avgs.index, avgs.matplotlib - 2 * stds.matplotlib, \n",
" avgs.index, avgs.matplotlib - 2 * stds.matplotlib,\n",
" avgs.matplotlib + 2 * stds.matplotlib, alpha=0.25\n",
")\n",
"\n",
Expand Down Expand Up @@ -34590,7 +34590,7 @@
"fig, ax = plt.subplots(figsize=(9, 3))\n",
"ax.plot(\n",
" stackoverflow_monthly.index,\n",
" stackoverflow_monthly.matplotlib, \n",
" stackoverflow_monthly.matplotlib,\n",
" 'ok', label=None, alpha=0.5\n",
")"
]
Expand Down Expand Up @@ -35395,7 +35395,7 @@
"source": [
"fig, ax = plt.subplots(figsize=(9, 3))\n",
"ax.plot(\n",
" x_axis_dates, stackoverflow_monthly.matplotlib, \n",
" x_axis_dates, stackoverflow_monthly.matplotlib,\n",
" 'ok', label=None, alpha=0.5\n",
")"
]
Expand Down Expand Up @@ -44476,7 +44476,7 @@
"source": [
"fig, ax = plt.subplots(figsize=(12, 3))\n",
"ax.stackplot(\n",
" mdates.date2num(top_libraries_monthly.index), top_libraries_monthly.to_numpy().T, \n",
" mdates.date2num(top_libraries_monthly.index), top_libraries_monthly.to_numpy().T,\n",
" labels=top_libraries_monthly.columns\n",
")\n",
"ax.set(xlabel='', ylabel='tagged questions', title='Stack Overflow Questions per Month')\n",
Expand Down Expand Up @@ -44509,7 +44509,7 @@
" fig, ax = plt.subplots(figsize=(12, 3))\n",
" ax.stackplot(\n",
" mdates.date2num(data.index),\n",
" data.to_numpy().T, \n",
" data.to_numpy().T,\n",
" labels=data.columns\n",
" )\n",
" ax.set(\n",
Expand Down Expand Up @@ -47195,7 +47195,7 @@
" fig, ax = plt.subplots(figsize=(12, 3))\n",
" ax.stackplot(\n",
" mdates.date2num(data.index),\n",
" data.to_numpy().T, \n",
" data.to_numpy().T,\n",
" labels=data.columns\n",
" )\n",
" ax.set(\n",
Expand Down Expand Up @@ -49833,7 +49833,7 @@
"import datetime as dt\n",
"\n",
"ax = area_plot(top_libraries_monthly)\n",
" \n",
"\n",
"# mark when seaborn was created\n",
"seaborn_released = dt.date(2013, 10, 28)\n",
"ax.axvline(seaborn_released, ymax=0.6, color='gray', linestyle='dashed')\n",
Expand Down Expand Up @@ -55159,8 +55159,8 @@
")\n",
"middle = (seaborn_released - first_seaborn_qs) / 2 + first_seaborn_qs\n",
"ax.annotate(\n",
" 'posts retroactively\\ntagged \"seaborn\"', \n",
" xy=(mdates.date2num(middle), 3500), \n",
" 'posts retroactively\\ntagged \"seaborn\"',\n",
" xy=(mdates.date2num(middle), 3500),\n",
" va='top', ha='center'\n",
")"
]
Expand Down Expand Up @@ -55210,7 +55210,7 @@
" fig, ax = plt.subplots(figsize=(12, 3))\n",
" ax.stackplot(\n",
" mdates.date2num(data.index),\n",
" data.to_numpy().T, \n",
" data.to_numpy().T,\n",
" labels=data.columns\n",
" )\n",
" ax.set(\n",
Expand Down Expand Up @@ -63637,7 +63637,7 @@
"\n",
"total_qs = top_libraries_monthly.sum()\n",
"inset_ax.barh(\n",
" total_qs.index, total_qs.to_numpy(), \n",
" total_qs.index, total_qs.to_numpy(),\n",
" color=[colors[label] for label in total_qs.index]\n",
")\n",
"inset_ax.yaxis.set_inverted(True)\n",
Expand Down Expand Up @@ -68368,7 +68368,7 @@
" co_occurring_library = data[library]\n",
" ax.barh(libraries, co_occurring_library, label=library, left=last)\n",
" last += co_occurring_library\n",
" \n",
"\n",
" ax.yaxis.set_inverted(True)\n",
" return despine(ax)"
]
Expand Down Expand Up @@ -73752,7 +73752,7 @@
" co_occurring_library = data[library]\n",
" ax.barh(libraries, co_occurring_library, label=library, left=last)\n",
" last += co_occurring_library\n",
" \n",
"\n",
" ax.yaxis.set_inverted(True)\n",
" ax.legend(bbox_to_anchor=(1.35, 0.5), loc='center right', framealpha=0.5)\n",
" ax.set(xlabel='percentage of questions with co-occurrences', xlim=(0, 1))\n",
Expand Down Expand Up @@ -76160,11 +76160,11 @@
" for i, library in enumerate(libraries):\n",
" co_occurring_library = data[library]\n",
" ax.barh(\n",
" libraries, co_occurring_library, \n",
" libraries, co_occurring_library,\n",
" label=library, left=last, color=cmap(i)\n",
" )\n",
" last += co_occurring_library\n",
" \n",
"\n",
" ax.yaxis.set_inverted(True)\n",
" ax.legend(bbox_to_anchor=(1.35, 0.5), loc='center right', framealpha=0.5)\n",
" ax.set(xlabel='percentage of questions with co-occurrences', xlim=(0, 1))\n",
Expand Down Expand Up @@ -78628,7 +78628,7 @@
],
"source": [
"subway = pd.read_csv(\n",
" '../data/NYC_subway_daily.csv', parse_dates=['Datetime'], \n",
" '../data/NYC_subway_daily.csv', parse_dates=['Datetime'],\n",
" index_col=['Borough', 'Datetime']\n",
")\n",
"subway_daily = subway.unstack(0)\n",
Expand Down Expand Up @@ -84537,7 +84537,7 @@
"\n",
" axes[0].set_ylabel('Frequency')\n",
" fig.suptitle('Histogram of Daily Subway Entries in Manhattan')\n",
" \n",
"\n",
" return fig, axes"
]
},
Expand Down
18 changes: 9 additions & 9 deletions notebooks/2-animations.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
"We made a bar plot that captured the total number of questions per library, but it couldn't show us the growth in pandas questions over time (or how the growth rate changed over time):\n",
"\n",
"<div style=\"text-align: center;\">\n",
" <img width=\"60%\" src=\"https://raw.githubusercontent.com/stefmolin/python-data-viz-workshop/main/media/bar_plot.svg\" alt=\"bar plot\" style=\"min-width: 500px\">\n",
" <img width=\"60%\" src=\"../slides/media/bar_plot.svg\" alt=\"bar plot\" style=\"min-width: 500px\">\n",
"</div>"
]
},
Expand All @@ -77,7 +77,7 @@
"We also made an area plot showing the number of questions per day over time for the top 4 libraries, but by limiting the libraries shown we lost some information:\n",
"\n",
"<div style=\"text-align: center;\">\n",
" <img width=\"90%\" src=\"https://raw.githubusercontent.com/stefmolin/python-data-viz-workshop/main/media/area_plot.svg\" alt=\"area plot\" style=\"min-width: 600px\">\n",
" <img width=\"90%\" src=\"../slides/media/area_plot.svg\" alt=\"area plot\" style=\"min-width: 600px\">\n",
"</div>"
]
},
Expand Down Expand Up @@ -373,7 +373,7 @@
" fig, ax = plt.subplots(figsize=(6, 4), layout='constrained')\n",
" sort_order = data.loc[data.index.max()].sort_values().index\n",
" bars = ax.barh(sort_order, [0] * data.shape[1], label=sort_order)\n",
" \n",
"\n",
" ax.set_xlabel('total questions', fontweight='bold')\n",
" ax.set_xlim(0, 250_000)\n",
" ax.xaxis.set_major_formatter(ticker.EngFormatter())\n",
Expand Down Expand Up @@ -1797,7 +1797,7 @@
"source": [
"def update(frame, *, ax, df, annotations, time_text):\n",
" data = df.loc[frame, :]\n",
" \n",
"\n",
" # update bars\n",
" for rect, text in zip(ax.patches, annotations):\n",
" col = rect.get_label()\n",
Expand Down Expand Up @@ -1883,7 +1883,7 @@
" fig, update_func, frames=questions_per_library.index, repeat=False\n",
")\n",
"ani.save(\n",
" '../media/stackoverflow_questions.mp4', \n",
" '../media/stackoverflow_questions.mp4',\n",
" writer='ffmpeg', fps=10, bitrate=100, dpi=300\n",
")\n",
"plt.close()"
Expand Down Expand Up @@ -1957,7 +1957,7 @@
"As with the previous example, the histograms of daily Manhattan subway entries in 2018 (from the first section of the workshop) don't tell the whole story of the dataset because the distributions changed drastically in 2020 and 2021:\n",
"\n",
"<div style=\"text-align: center;\">\n",
" <img width=\"70%\" src=\"https://raw.githubusercontent.com/stefmolin/python-data-viz-workshop/main/media/2018_subway_entries_histogram.svg\" alt=\"Histograms of daily Manhattan subway entries in 2018\" style=\"min-width: 500px\">\n",
" <img width=\"70%\" src=\"../slides/media/2018_subway_entries_histogram.svg\" alt=\"Histograms of daily Manhattan subway entries in 2018\" style=\"min-width: 500px\">\n",
"</div>"
]
},
Expand Down Expand Up @@ -2153,7 +2153,7 @@
],
"source": [
"subway = pd.read_csv(\n",
" '../data/NYC_subway_daily.csv', parse_dates=['Datetime'], \n",
" '../data/NYC_subway_daily.csv', parse_dates=['Datetime'],\n",
" index_col=['Borough', 'Datetime']\n",
")\n",
"subway_daily = subway.unstack(0)\n",
Expand Down Expand Up @@ -2249,7 +2249,7 @@
" {'label': 'Weekend', 'mask': ~weekday_mask, 'ymax': 60},\n",
" {'label': 'Weekday', 'mask': weekday_mask, 'ymax': 120}\n",
" ]\n",
" \n",
"\n",
" fig, axes = plt.subplots(1, 2, figsize=(6, 3), sharex=True, layout='constrained')\n",
" for ax, config in zip(axes, configs):\n",
" _, _, config['hist'] = ax.hist(\n",
Expand Down Expand Up @@ -4396,7 +4396,7 @@
"metadata": {},
"outputs": [],
"source": [
"# "
"#"
]
},
{
Expand Down
28 changes: 14 additions & 14 deletions notebooks/3-interactivity.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1240,7 +1240,7 @@
"<p>The interactivity works best in a notebook environment &ndash; here's an example of the slider, tooltips, and zoom/pan functionality in action:</p>\n",
"\n",
"<div style=\"text-align: center;\">\n",
" <img width=\"50%\" src=\"https://raw.githubusercontent.com/stefmolin/python-data-viz-workshop/main/media/interactive_map.gif\" style=\"border: 5px solid #555; min-width: 600px;\">\n",
" <img width=\"50%\" src=\"../slides/media/interactive_map.gif\" style=\"border: 5px solid #555; min-width: 600px;\">\n",
"</div>"
]
},
Expand Down Expand Up @@ -2227,7 +2227,7 @@
"<p>The result can be interacted with after displaying it, but this kind of interactivity only works in the notebook. Here's an example:</p>\n",
"\n",
"<div style=\"text-align: center;\">\n",
" <img width=\"60%\" src=\"https://raw.githubusercontent.com/stefmolin/python-data-viz-workshop/main/media/linked_plots.gif\" style=\"border: 5px solid #555; min-width: 800px; margin: 0px auto;\">\n",
" <img width=\"60%\" src=\"../slides/media/linked_plots.gif\" style=\"border: 5px solid #555; min-width: 800px; margin: 0px auto;\">\n",
" <br/>\n",
" <small><em>This example uses tabs to display the layout rather than side by side like we did. Tabs are a little buggy in this version, so we didn't use them.</em></small>\n",
"</div>"
Expand Down Expand Up @@ -2412,7 +2412,7 @@
},
"source": [
"<div style=\"text-align: center;\">\n",
" <img width=\"75%\" src=\"https://raw.githubusercontent.com/stefmolin/python-data-viz-workshop/main/media/sankey_and_chord.png\" style=\"min-width: 750px\">\n",
" <img width=\"75%\" src=\"../slides/media/sankey_and_chord.png\" style=\"min-width: 750px\">\n",
"</div>"
]
},
Expand Down Expand Up @@ -2443,12 +2443,12 @@
"flight_stats = pd.read_csv(\n",
" '../data/T100_MARKET_ALL_CARRIER.zip',\n",
" usecols=[\n",
" 'CLASS', 'REGION', 'UNIQUE_CARRIER_NAME', 'ORIGIN_CITY_NAME', 'ORIGIN', \n",
" 'CLASS', 'REGION', 'UNIQUE_CARRIER_NAME', 'ORIGIN_CITY_NAME', 'ORIGIN',\n",
" 'DEST_CITY_NAME', 'DEST', 'PASSENGERS', 'FREIGHT', 'MAIL'\n",
" ]\n",
").rename(lambda x: x.lower(), axis=1).assign(\n",
" region=lambda x: x.region.replace({\n",
" 'D': 'Domestic', 'I': 'International', 'A': 'Atlantic', \n",
" 'D': 'Domestic', 'I': 'International', 'A': 'Atlantic',\n",
" 'L': 'Latin America', 'P': 'Pacific', 'S': 'System'\n",
" }),\n",
" route=lambda x: np.where(\n",
Expand Down Expand Up @@ -2653,7 +2653,7 @@
"source": [
"cities = [\n",
" 'Atlanta, GA', 'Chicago, IL', 'New York, NY', 'Los Angeles, CA',\n",
" 'Dallas/Fort Worth, TX', 'Denver, CO', 'Houston, TX', \n",
" 'Dallas/Fort Worth, TX', 'Denver, CO', 'Houston, TX',\n",
" 'San Francisco, CA', 'Seattle, WA', 'Orlando, FL'\n",
"]\n",
"\n",
Expand All @@ -2678,7 +2678,7 @@
"A **chord diagram** is a way of showing many-to-many relationships between a set of entities called **nodes**: the nodes are arranged in a circle, and chords (which can be thought of as **edges**) are drawn between those that are connected, with the width of the chord encoding the strength of the connection. In this section, we will be making a chord diagram for total passenger service travel between the top 10 cities in 2019:\n",
"\n",
"<div style=\"text-align: center;\">\n",
" <img width=\"35%\" src=\"https://raw.githubusercontent.com/stefmolin/python-data-viz-workshop/main/media/chord.png\" style=\"border: 5px solid #555; min-width: 350px\">\n",
" <img width=\"35%\" src=\"../slides/media/chord.png\" style=\"border: 5px solid #555; min-width: 350px\">\n",
"</div>"
]
},
Expand Down Expand Up @@ -2948,7 +2948,7 @@
"source": [
"chord = hv.Chord(\n",
" total_flight_stats,\n",
" kdims=['origin', 'dest'], \n",
" kdims=['origin', 'dest'],\n",
" vdims=['passengers', 'origin_city_name', 'dest_city_name', 'mail', 'freight']\n",
")"
]
Expand Down Expand Up @@ -3162,7 +3162,7 @@
"<p>The result can be interacted with after displaying it, but it works best in the notebook &ndash; the GIF below shows some example interactions. Note that for this visualization the interactivity is what makes it useful:</p>\n",
"\n",
"<div style=\"text-align: center;\">\n",
" <img width=\"50%\" src=\"https://raw.githubusercontent.com/stefmolin/python-data-viz-workshop/main/media/chord.gif\" style=\"border: 5px solid #555; min-width: 500px;\">\n",
" <img width=\"50%\" src=\"../slides/media/chord.gif\" style=\"border: 5px solid #555; min-width: 500px;\">\n",
"</div>"
]
},
Expand All @@ -3181,7 +3181,7 @@
"For our final visualization, we will create a **Sankey plot**, which is a way to visualize flow as edges between nodes. Here, we will use it to analyze airline market share for passenger service flights between the top 5 US cities:\n",
"\n",
"<div style=\"text-align: center;\">\n",
" <img width=\"50%\" src=\"https://raw.githubusercontent.com/stefmolin/python-data-viz-workshop/main/media/sankey.png\" style=\"border: 5px solid #555; min-width: 600px;\">\n",
" <img width=\"50%\" src=\"../slides/media/sankey.png\" style=\"border: 5px solid #555; min-width: 600px;\">\n",
"</div>"
]
},
Expand Down Expand Up @@ -3605,7 +3605,7 @@
],
"source": [
"carrier_edges = get_edges(\n",
" domestic_passenger_travel, \n",
" domestic_passenger_travel,\n",
" source_col='region',\n",
" target_col='unique_carrier_name'\n",
").replace('^Domestic$', 'Top Routes', regex=True)\n",
Expand Down Expand Up @@ -3806,11 +3806,11 @@
"outputs": [],
"source": [
"sankey = hv.Sankey(\n",
" all_edges, \n",
" all_edges,\n",
" kdims=['source', 'target'],\n",
" vdims=hv.Dimension('passengers', unit='M')\n",
").opts(\n",
" labels='index', label_position='right', cmap='Set1', # node config \n",
" labels='index', label_position='right', cmap='Set1', # node config\n",
" edge_color='lightgray', # edge config\n",
" width=750, height=600, # plot size config\n",
" title='Travel Between the Top 5 Cities in 2019'\n",
Expand Down Expand Up @@ -3951,7 +3951,7 @@
"<p>The resulting visualization can be interacted with after displaying it, but it works best in the notebook. Here's an example:</p>\n",
"\n",
"<div style=\"text-align: center;\">\n",
" <img width=\"50%\" src=\"https://raw.githubusercontent.com/stefmolin/python-data-viz-workshop/main/media/sankey.gif\" style=\"border: 5px solid #555; min-width: 700px;\">\n",
" <img width=\"50%\" src=\"../slides/media/sankey.gif\" style=\"border: 5px solid #555; min-width: 700px;\">\n",
"</div>"
]
},
Expand Down
Loading
Loading