diff --git a/.github/workflows/publish-slides.yml b/.github/workflows/publish-slides.yml index 1425d36..05f6004 100644 --- a/.github/workflows/publish-slides.yml +++ b/.github/workflows/publish-slides.yml @@ -34,7 +34,8 @@ jobs: run: ./slides/generate_slides.sh regular - name: Package files run: | - mkdir public && \ + mkdir -p public/media && \ + cp slides/media/* public/media/ && \ cp slides/html/workshop.slides.html public/index.html; - name: Upload artifact uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b # v4.0.0 diff --git a/notebooks/1-getting_started_with_matplotlib.ipynb b/notebooks/1-getting_started_with_matplotlib.ipynb index 2a13ec1..80ed103 100644 --- a/notebooks/1-getting_started_with_matplotlib.ipynb +++ b/notebooks/1-getting_started_with_matplotlib.ipynb @@ -1596,7 +1596,7 @@ ], "source": [ "stackoverflow_monthly.matplotlib.plot(\n", - " figsize=(8, 2), xlabel='creation date', ylabel='total questions', \n", + " figsize=(8, 2), xlabel='creation date', ylabel='total questions',\n", " title='Matplotlib Questions per Month\\n(since the creation of Stack Overflow)'\n", ")" ] @@ -1729,7 +1729,7 @@ "The `Figure` object is the container for all components of our visualization. It contains one or more `Axes` objects, which can be thought of as the (sub)plots, as well as other [*Artists*](https://matplotlib.org/stable/tutorials/intermediate/artists.html), which draw on the plot canvas (x-axis, y-axis, legend, lines, etc.). The following image from the Matplotlib documentation illustrates the different components of a figure:\n", "\n", "
\n", - " \"Matplotlib\n", + " \"Matplotlib\n", "
Source
\n", "
" ] @@ -5705,7 +5705,7 @@ ], "source": [ "ax = stackoverflow_monthly.matplotlib.plot(\n", - " figsize=(8, 2), xlabel='creation date', ylabel='total questions', \n", + " figsize=(8, 2), xlabel='creation date', ylabel='total questions',\n", " title='Matplotlib Questions per Month\\n(since the creation of Stack Overflow)'\n", ")\n", "ax.set_ylim(0, None) # this can also be done with pandas\n", @@ -21035,7 +21035,7 @@ "fig, ax = plt.subplots(figsize=(8, 2))\n", "ax.plot(avgs.index, avgs.matplotlib)\n", "ax.fill_between(\n", - " avgs.index, avgs.matplotlib - 2 * stds.matplotlib, \n", + " avgs.index, avgs.matplotlib - 2 * stds.matplotlib,\n", " avgs.matplotlib + 2 * stds.matplotlib, alpha=0.25\n", ")" ] @@ -33816,7 +33816,7 @@ "fig, ax = plt.subplots(figsize=(8, 2))\n", "ax.plot(avgs.index, avgs.matplotlib)\n", "ax.fill_between(\n", - " avgs.index, avgs.matplotlib - 2 * stds.matplotlib, \n", + " avgs.index, avgs.matplotlib - 2 * stds.matplotlib,\n", " avgs.matplotlib + 2 * stds.matplotlib, alpha=0.25\n", ")\n", "\n", @@ -34590,7 +34590,7 @@ "fig, ax = plt.subplots(figsize=(9, 3))\n", "ax.plot(\n", " stackoverflow_monthly.index,\n", - " stackoverflow_monthly.matplotlib, \n", + " stackoverflow_monthly.matplotlib,\n", " 'ok', label=None, alpha=0.5\n", ")" ] @@ -35395,7 +35395,7 @@ "source": [ "fig, ax = plt.subplots(figsize=(9, 3))\n", "ax.plot(\n", - " x_axis_dates, stackoverflow_monthly.matplotlib, \n", + " x_axis_dates, stackoverflow_monthly.matplotlib,\n", " 'ok', label=None, alpha=0.5\n", ")" ] @@ -44476,7 +44476,7 @@ "source": [ "fig, ax = plt.subplots(figsize=(12, 3))\n", "ax.stackplot(\n", - " mdates.date2num(top_libraries_monthly.index), top_libraries_monthly.to_numpy().T, \n", + " mdates.date2num(top_libraries_monthly.index), top_libraries_monthly.to_numpy().T,\n", " labels=top_libraries_monthly.columns\n", ")\n", "ax.set(xlabel='', ylabel='tagged questions', title='Stack Overflow Questions per Month')\n", @@ -44509,7 +44509,7 @@ " fig, ax = plt.subplots(figsize=(12, 3))\n", " ax.stackplot(\n", " mdates.date2num(data.index),\n", - " data.to_numpy().T, \n", + " data.to_numpy().T,\n", " labels=data.columns\n", " )\n", " ax.set(\n", @@ -47195,7 +47195,7 @@ " fig, ax = plt.subplots(figsize=(12, 3))\n", " ax.stackplot(\n", " mdates.date2num(data.index),\n", - " data.to_numpy().T, \n", + " data.to_numpy().T,\n", " labels=data.columns\n", " )\n", " ax.set(\n", @@ -49833,7 +49833,7 @@ "import datetime as dt\n", "\n", "ax = area_plot(top_libraries_monthly)\n", - " \n", + "\n", "# mark when seaborn was created\n", "seaborn_released = dt.date(2013, 10, 28)\n", "ax.axvline(seaborn_released, ymax=0.6, color='gray', linestyle='dashed')\n", @@ -55159,8 +55159,8 @@ ")\n", "middle = (seaborn_released - first_seaborn_qs) / 2 + first_seaborn_qs\n", "ax.annotate(\n", - " 'posts retroactively\\ntagged \"seaborn\"', \n", - " xy=(mdates.date2num(middle), 3500), \n", + " 'posts retroactively\\ntagged \"seaborn\"',\n", + " xy=(mdates.date2num(middle), 3500),\n", " va='top', ha='center'\n", ")" ] @@ -55210,7 +55210,7 @@ " fig, ax = plt.subplots(figsize=(12, 3))\n", " ax.stackplot(\n", " mdates.date2num(data.index),\n", - " data.to_numpy().T, \n", + " data.to_numpy().T,\n", " labels=data.columns\n", " )\n", " ax.set(\n", @@ -63637,7 +63637,7 @@ "\n", "total_qs = top_libraries_monthly.sum()\n", "inset_ax.barh(\n", - " total_qs.index, total_qs.to_numpy(), \n", + " total_qs.index, total_qs.to_numpy(),\n", " color=[colors[label] for label in total_qs.index]\n", ")\n", "inset_ax.yaxis.set_inverted(True)\n", @@ -68368,7 +68368,7 @@ " co_occurring_library = data[library]\n", " ax.barh(libraries, co_occurring_library, label=library, left=last)\n", " last += co_occurring_library\n", - " \n", + "\n", " ax.yaxis.set_inverted(True)\n", " return despine(ax)" ] @@ -73752,7 +73752,7 @@ " co_occurring_library = data[library]\n", " ax.barh(libraries, co_occurring_library, label=library, left=last)\n", " last += co_occurring_library\n", - " \n", + "\n", " ax.yaxis.set_inverted(True)\n", " ax.legend(bbox_to_anchor=(1.35, 0.5), loc='center right', framealpha=0.5)\n", " ax.set(xlabel='percentage of questions with co-occurrences', xlim=(0, 1))\n", @@ -76160,11 +76160,11 @@ " for i, library in enumerate(libraries):\n", " co_occurring_library = data[library]\n", " ax.barh(\n", - " libraries, co_occurring_library, \n", + " libraries, co_occurring_library,\n", " label=library, left=last, color=cmap(i)\n", " )\n", " last += co_occurring_library\n", - " \n", + "\n", " ax.yaxis.set_inverted(True)\n", " ax.legend(bbox_to_anchor=(1.35, 0.5), loc='center right', framealpha=0.5)\n", " ax.set(xlabel='percentage of questions with co-occurrences', xlim=(0, 1))\n", @@ -78628,7 +78628,7 @@ ], "source": [ "subway = pd.read_csv(\n", - " '../data/NYC_subway_daily.csv', parse_dates=['Datetime'], \n", + " '../data/NYC_subway_daily.csv', parse_dates=['Datetime'],\n", " index_col=['Borough', 'Datetime']\n", ")\n", "subway_daily = subway.unstack(0)\n", @@ -84537,7 +84537,7 @@ "\n", " axes[0].set_ylabel('Frequency')\n", " fig.suptitle('Histogram of Daily Subway Entries in Manhattan')\n", - " \n", + "\n", " return fig, axes" ] }, diff --git a/notebooks/2-animations.ipynb b/notebooks/2-animations.ipynb index 7c8e2ab..1f0a9cf 100644 --- a/notebooks/2-animations.ipynb +++ b/notebooks/2-animations.ipynb @@ -60,7 +60,7 @@ "We made a bar plot that captured the total number of questions per library, but it couldn't show us the growth in pandas questions over time (or how the growth rate changed over time):\n", "\n", "
\n", - " \"bar\n", + " \"bar\n", "
" ] }, @@ -77,7 +77,7 @@ "We also made an area plot showing the number of questions per day over time for the top 4 libraries, but by limiting the libraries shown we lost some information:\n", "\n", "
\n", - " \"area\n", + " \"area\n", "
" ] }, @@ -373,7 +373,7 @@ " fig, ax = plt.subplots(figsize=(6, 4), layout='constrained')\n", " sort_order = data.loc[data.index.max()].sort_values().index\n", " bars = ax.barh(sort_order, [0] * data.shape[1], label=sort_order)\n", - " \n", + "\n", " ax.set_xlabel('total questions', fontweight='bold')\n", " ax.set_xlim(0, 250_000)\n", " ax.xaxis.set_major_formatter(ticker.EngFormatter())\n", @@ -1797,7 +1797,7 @@ "source": [ "def update(frame, *, ax, df, annotations, time_text):\n", " data = df.loc[frame, :]\n", - " \n", + "\n", " # update bars\n", " for rect, text in zip(ax.patches, annotations):\n", " col = rect.get_label()\n", @@ -1883,7 +1883,7 @@ " fig, update_func, frames=questions_per_library.index, repeat=False\n", ")\n", "ani.save(\n", - " '../media/stackoverflow_questions.mp4', \n", + " '../media/stackoverflow_questions.mp4',\n", " writer='ffmpeg', fps=10, bitrate=100, dpi=300\n", ")\n", "plt.close()" @@ -1957,7 +1957,7 @@ "As with the previous example, the histograms of daily Manhattan subway entries in 2018 (from the first section of the workshop) don't tell the whole story of the dataset because the distributions changed drastically in 2020 and 2021:\n", "\n", "
\n", - " \"Histograms\n", + " \"Histograms\n", "
" ] }, @@ -2153,7 +2153,7 @@ ], "source": [ "subway = pd.read_csv(\n", - " '../data/NYC_subway_daily.csv', parse_dates=['Datetime'], \n", + " '../data/NYC_subway_daily.csv', parse_dates=['Datetime'],\n", " index_col=['Borough', 'Datetime']\n", ")\n", "subway_daily = subway.unstack(0)\n", @@ -2249,7 +2249,7 @@ " {'label': 'Weekend', 'mask': ~weekday_mask, 'ymax': 60},\n", " {'label': 'Weekday', 'mask': weekday_mask, 'ymax': 120}\n", " ]\n", - " \n", + "\n", " fig, axes = plt.subplots(1, 2, figsize=(6, 3), sharex=True, layout='constrained')\n", " for ax, config in zip(axes, configs):\n", " _, _, config['hist'] = ax.hist(\n", @@ -4396,7 +4396,7 @@ "metadata": {}, "outputs": [], "source": [ - "# " + "#" ] }, { diff --git a/notebooks/3-interactivity.ipynb b/notebooks/3-interactivity.ipynb index fe64c38..98dca1c 100644 --- a/notebooks/3-interactivity.ipynb +++ b/notebooks/3-interactivity.ipynb @@ -1240,7 +1240,7 @@ "

The interactivity works best in a notebook environment – here's an example of the slider, tooltips, and zoom/pan functionality in action:

\n", "\n", "
\n", - " \n", + " \n", "
" ] }, @@ -2227,7 +2227,7 @@ "

The result can be interacted with after displaying it, but this kind of interactivity only works in the notebook. Here's an example:

\n", "\n", "
\n", - " \n", + " \n", "
\n", " This example uses tabs to display the layout rather than side by side like we did. Tabs are a little buggy in this version, so we didn't use them.\n", "
" @@ -2412,7 +2412,7 @@ }, "source": [ "
\n", - " \n", + " \n", "
" ] }, @@ -2443,12 +2443,12 @@ "flight_stats = pd.read_csv(\n", " '../data/T100_MARKET_ALL_CARRIER.zip',\n", " usecols=[\n", - " 'CLASS', 'REGION', 'UNIQUE_CARRIER_NAME', 'ORIGIN_CITY_NAME', 'ORIGIN', \n", + " 'CLASS', 'REGION', 'UNIQUE_CARRIER_NAME', 'ORIGIN_CITY_NAME', 'ORIGIN',\n", " 'DEST_CITY_NAME', 'DEST', 'PASSENGERS', 'FREIGHT', 'MAIL'\n", " ]\n", ").rename(lambda x: x.lower(), axis=1).assign(\n", " region=lambda x: x.region.replace({\n", - " 'D': 'Domestic', 'I': 'International', 'A': 'Atlantic', \n", + " 'D': 'Domestic', 'I': 'International', 'A': 'Atlantic',\n", " 'L': 'Latin America', 'P': 'Pacific', 'S': 'System'\n", " }),\n", " route=lambda x: np.where(\n", @@ -2653,7 +2653,7 @@ "source": [ "cities = [\n", " 'Atlanta, GA', 'Chicago, IL', 'New York, NY', 'Los Angeles, CA',\n", - " 'Dallas/Fort Worth, TX', 'Denver, CO', 'Houston, TX', \n", + " 'Dallas/Fort Worth, TX', 'Denver, CO', 'Houston, TX',\n", " 'San Francisco, CA', 'Seattle, WA', 'Orlando, FL'\n", "]\n", "\n", @@ -2678,7 +2678,7 @@ "A **chord diagram** is a way of showing many-to-many relationships between a set of entities called **nodes**: the nodes are arranged in a circle, and chords (which can be thought of as **edges**) are drawn between those that are connected, with the width of the chord encoding the strength of the connection. In this section, we will be making a chord diagram for total passenger service travel between the top 10 cities in 2019:\n", "\n", "
\n", - " \n", + " \n", "
" ] }, @@ -2948,7 +2948,7 @@ "source": [ "chord = hv.Chord(\n", " total_flight_stats,\n", - " kdims=['origin', 'dest'], \n", + " kdims=['origin', 'dest'],\n", " vdims=['passengers', 'origin_city_name', 'dest_city_name', 'mail', 'freight']\n", ")" ] @@ -3162,7 +3162,7 @@ "

The result can be interacted with after displaying it, but it works best in the notebook – the GIF below shows some example interactions. Note that for this visualization the interactivity is what makes it useful:

\n", "\n", "
\n", - " \n", + " \n", "
" ] }, @@ -3181,7 +3181,7 @@ "For our final visualization, we will create a **Sankey plot**, which is a way to visualize flow as edges between nodes. Here, we will use it to analyze airline market share for passenger service flights between the top 5 US cities:\n", "\n", "
\n", - " \n", + " \n", "
" ] }, @@ -3605,7 +3605,7 @@ ], "source": [ "carrier_edges = get_edges(\n", - " domestic_passenger_travel, \n", + " domestic_passenger_travel,\n", " source_col='region',\n", " target_col='unique_carrier_name'\n", ").replace('^Domestic$', 'Top Routes', regex=True)\n", @@ -3806,11 +3806,11 @@ "outputs": [], "source": [ "sankey = hv.Sankey(\n", - " all_edges, \n", + " all_edges,\n", " kdims=['source', 'target'],\n", " vdims=hv.Dimension('passengers', unit='M')\n", ").opts(\n", - " labels='index', label_position='right', cmap='Set1', # node config \n", + " labels='index', label_position='right', cmap='Set1', # node config\n", " edge_color='lightgray', # edge config\n", " width=750, height=600, # plot size config\n", " title='Travel Between the Top 5 Cities in 2019'\n", @@ -3951,7 +3951,7 @@ "

The resulting visualization can be interacted with after displaying it, but it works best in the notebook. Here's an example:

\n", "\n", "
\n", - " \n", + " \n", "
" ] }, diff --git a/slides/1-getting_started_with_matplotlib.ipynb b/slides/1-getting_started_with_matplotlib.ipynb index 97a9349..1938019 100644 --- a/slides/1-getting_started_with_matplotlib.ipynb +++ b/slides/1-getting_started_with_matplotlib.ipynb @@ -1620,7 +1620,7 @@ ], "source": [ "stackoverflow_monthly.matplotlib.plot(\n", - " figsize=(8, 2), xlabel='creation date', ylabel='total questions', \n", + " figsize=(8, 2), xlabel='creation date', ylabel='total questions',\n", " title='Matplotlib Questions per Month\\n(since the creation of Stack Overflow)'\n", ")" ] @@ -1774,7 +1774,7 @@ "The `Figure` object is the container for all components of our visualization. It contains one or more `Axes` objects, which can be thought of as the (sub)plots, as well as other [*Artists*](https://matplotlib.org/stable/tutorials/intermediate/artists.html), which draw on the plot canvas (x-axis, y-axis, legend, lines, etc.). The following image from the Matplotlib documentation illustrates the different components of a figure:\n", "\n", "
\n", - " \"Matplotlib\n", + " \"Matplotlib\n", "
Source
\n", "
" ] @@ -5750,7 +5750,7 @@ ], "source": [ "ax = stackoverflow_monthly.matplotlib.plot(\n", - " figsize=(8, 2), xlabel='creation date', ylabel='total questions', \n", + " figsize=(8, 2), xlabel='creation date', ylabel='total questions',\n", " title='Matplotlib Questions per Month\\n(since the creation of Stack Overflow)'\n", ")\n", "ax.set_ylim(0, None) # this can also be done with pandas\n", @@ -21101,7 +21101,7 @@ "fig, ax = plt.subplots(figsize=(8, 2))\n", "ax.plot(avgs.index, avgs.matplotlib)\n", "ax.fill_between(\n", - " avgs.index, avgs.matplotlib - 2 * stds.matplotlib, \n", + " avgs.index, avgs.matplotlib - 2 * stds.matplotlib,\n", " avgs.matplotlib + 2 * stds.matplotlib, alpha=0.25\n", ")" ] @@ -33882,7 +33882,7 @@ "fig, ax = plt.subplots(figsize=(8, 2))\n", "ax.plot(avgs.index, avgs.matplotlib)\n", "ax.fill_between(\n", - " avgs.index, avgs.matplotlib - 2 * stds.matplotlib, \n", + " avgs.index, avgs.matplotlib - 2 * stds.matplotlib,\n", " avgs.matplotlib + 2 * stds.matplotlib, alpha=0.25\n", ")\n", "\n", @@ -34658,7 +34658,7 @@ "fig, ax = plt.subplots(figsize=(9, 3))\n", "ax.plot(\n", " stackoverflow_monthly.index,\n", - " stackoverflow_monthly.matplotlib, \n", + " stackoverflow_monthly.matplotlib,\n", " 'ok', label=None, alpha=0.5\n", ")" ] @@ -35463,7 +35463,7 @@ "source": [ "fig, ax = plt.subplots(figsize=(9, 3))\n", "ax.plot(\n", - " x_axis_dates, stackoverflow_monthly.matplotlib, \n", + " x_axis_dates, stackoverflow_monthly.matplotlib,\n", " 'ok', label=None, alpha=0.5\n", ")" ] @@ -40384,7 +40384,7 @@ " ax.xaxis.set_major_formatter(mdates.DateFormatter('%b\\n%Y'))\n", "\n", " ax.fill_between(\n", - " la_tavg.index, la_tavg, nyc_tavg, where=nyc_tavg > la_tavg, \n", + " la_tavg.index, la_tavg, nyc_tavg, where=nyc_tavg > la_tavg,\n", " hatch='///', facecolor='gray', alpha=0.5, label='NYC hotter than LA'\n", " )\n", " ax.legend(ncols=3, loc='lower center')\n", @@ -47037,7 +47037,7 @@ "source": [ "fig, ax = plt.subplots(figsize=(12, 3))\n", "ax.stackplot(\n", - " mdates.date2num(top_libraries_monthly.index), top_libraries_monthly.to_numpy().T, \n", + " mdates.date2num(top_libraries_monthly.index), top_libraries_monthly.to_numpy().T,\n", " labels=top_libraries_monthly.columns\n", ")\n", "ax.set(xlabel='', ylabel='tagged questions', title='Stack Overflow Questions per Month')\n", @@ -47070,7 +47070,7 @@ " fig, ax = plt.subplots(figsize=(12, 3))\n", " ax.stackplot(\n", " mdates.date2num(data.index),\n", - " data.to_numpy().T, \n", + " data.to_numpy().T,\n", " labels=data.columns\n", " )\n", " ax.set(\n", @@ -49756,7 +49756,7 @@ " fig, ax = plt.subplots(figsize=(12, 3))\n", " ax.stackplot(\n", " mdates.date2num(data.index),\n", - " data.to_numpy().T, \n", + " data.to_numpy().T,\n", " labels=data.columns\n", " )\n", " ax.set(\n", @@ -52394,7 +52394,7 @@ "import datetime as dt\n", "\n", "ax = area_plot(top_libraries_monthly)\n", - " \n", + "\n", "# mark when seaborn was created\n", "seaborn_released = dt.date(2013, 10, 28)\n", "ax.axvline(seaborn_released, ymax=0.6, color='gray', linestyle='dashed')\n", @@ -57720,8 +57720,8 @@ ")\n", "middle = (seaborn_released - first_seaborn_qs) / 2 + first_seaborn_qs\n", "ax.annotate(\n", - " 'posts retroactively\\ntagged \"seaborn\"', \n", - " xy=(mdates.date2num(middle), 3500), \n", + " 'posts retroactively\\ntagged \"seaborn\"',\n", + " xy=(mdates.date2num(middle), 3500),\n", " va='top', ha='center'\n", ")" ] @@ -57773,7 +57773,7 @@ " fig, ax = plt.subplots(figsize=(12, 3))\n", " ax.stackplot(\n", " mdates.date2num(data.index),\n", - " data.to_numpy().T, \n", + " data.to_numpy().T,\n", " labels=data.columns\n", " )\n", " ax.set(\n", @@ -66200,7 +66200,7 @@ "\n", "total_qs = top_libraries_monthly.sum()\n", "inset_ax.barh(\n", - " total_qs.index, total_qs.to_numpy(), \n", + " total_qs.index, total_qs.to_numpy(),\n", " color=[colors[label] for label in total_qs.index]\n", ")\n", "inset_ax.yaxis.set_inverted(True)\n", @@ -72555,7 +72555,7 @@ " co_occurring_library = data[library]\n", " ax.barh(libraries, co_occurring_library, label=library, left=last)\n", " last += co_occurring_library\n", - " \n", + "\n", " ax.yaxis.set_inverted(True)\n", " return despine(ax)" ] @@ -77939,7 +77939,7 @@ " co_occurring_library = data[library]\n", " ax.barh(libraries, co_occurring_library, label=library, left=last)\n", " last += co_occurring_library\n", - " \n", + "\n", " ax.yaxis.set_inverted(True)\n", " ax.legend(bbox_to_anchor=(1.35, 0.5), loc='center right', framealpha=0.5)\n", " ax.set(xlabel='percentage of questions with co-occurrences', xlim=(0, 1))\n", @@ -80347,11 +80347,11 @@ " for i, library in enumerate(libraries):\n", " co_occurring_library = data[library]\n", " ax.barh(\n", - " libraries, co_occurring_library, \n", + " libraries, co_occurring_library,\n", " label=library, left=last, color=cmap(i)\n", " )\n", " last += co_occurring_library\n", - " \n", + "\n", " ax.yaxis.set_inverted(True)\n", " ax.legend(bbox_to_anchor=(1.35, 0.5), loc='center right', framealpha=0.5)\n", " ax.set(xlabel='percentage of questions with co-occurrences', xlim=(0, 1))\n", @@ -82836,7 +82836,7 @@ " )\n", " last += prcp\n", "\n", - " ax.set_xlabel('2020 total precipitation (inches)') \n", + " ax.set_xlabel('2020 total precipitation (inches)')\n", " ax.set_title('Total Precipitation per City in 2020', y=1.1)\n", " ax.axvline(total_prcp['Seattle'], linestyle='--', color='gray')\n", " ax.legend(bbox_to_anchor=(0.5, 1.15), loc='upper center', ncols=4, frameon=False)\n", @@ -84824,7 +84824,7 @@ ], "source": [ "subway = pd.read_csv(\n", - " '../data/NYC_subway_daily.csv', parse_dates=['Datetime'], \n", + " '../data/NYC_subway_daily.csv', parse_dates=['Datetime'],\n", " index_col=['Borough', 'Datetime']\n", ")\n", "subway_daily = subway.unstack(0)\n", @@ -90733,7 +90733,7 @@ "\n", " axes[0].set_ylabel('Frequency')\n", " fig.suptitle('Histogram of Daily Subway Entries in Manhattan')\n", - " \n", + "\n", " return fig, axes" ] }, diff --git a/slides/2-animations.ipynb b/slides/2-animations.ipynb index a3a1098..f5b0fe5 100644 --- a/slides/2-animations.ipynb +++ b/slides/2-animations.ipynb @@ -85,7 +85,7 @@ "We made a bar plot that captured the total number of questions per library, but it couldn't show us the growth in pandas questions over time (or how the growth rate changed over time):\n", "\n", "
\n", - " \"bar\n", + " \"bar\n", "
" ] }, @@ -102,7 +102,7 @@ "We also made an area plot showing the number of questions per day over time for the top 4 libraries, but by limiting the libraries shown we lost some information:\n", "\n", "
\n", - " \"area\n", + " \"area\n", "
" ] }, @@ -404,7 +404,7 @@ " fig, ax = plt.subplots(figsize=(6, 4), layout='constrained')\n", " sort_order = data.loc[data.index.max()].squeeze().sort_values().index\n", " bars = ax.barh(sort_order, [0] * data.shape[1], label=sort_order)\n", - " \n", + "\n", " ax.set_xlabel('total questions', fontweight='bold')\n", " ax.set_xlim(0, 250_000)\n", " ax.xaxis.set_major_formatter(ticker.EngFormatter())\n", @@ -1834,7 +1834,7 @@ "source": [ "def update(frame, *, ax, df, annotations, time_text):\n", " data = df.loc[frame, :]\n", - " \n", + "\n", " # update bars\n", " for rect, text in zip(ax.patches, annotations):\n", " col = rect.get_label()\n", @@ -1922,7 +1922,7 @@ " fig, update_func, frames=questions_per_library.index, repeat=False\n", ")\n", "ani.save(\n", - " '../media/stackoverflow_questions.mp4', \n", + " '../media/stackoverflow_questions.mp4',\n", " writer='ffmpeg', fps=10, bitrate=100, dpi=300\n", ")\n", "plt.close()" @@ -2015,7 +2015,7 @@ "As with the previous example, the histograms of daily Manhattan subway entries in 2018 (from the first section of the workshop) don't tell the whole story of the dataset because the distributions changed drastically in 2020 and 2021:\n", "\n", "
\n", - " \"Histograms\n", + " \"Histograms\n", "
" ] }, @@ -2213,7 +2213,7 @@ ], "source": [ "subway = pd.read_csv(\n", - " '../data/NYC_subway_daily.csv', parse_dates=['Datetime'], \n", + " '../data/NYC_subway_daily.csv', parse_dates=['Datetime'],\n", " index_col=['Borough', 'Datetime']\n", ")\n", "subway_daily = subway.unstack(0)\n", @@ -2315,7 +2315,7 @@ " {'label': 'Weekend', 'mask': ~weekday_mask, 'ymax': 60},\n", " {'label': 'Weekday', 'mask': weekday_mask, 'ymax': 120}\n", " ]\n", - " \n", + "\n", " fig, axes = plt.subplots(1, 2, figsize=(6, 3), sharex=True, layout='constrained')\n", " for ax, config in zip(axes, configs):\n", " _, _, config['hist'] = ax.hist(\n", @@ -4461,7 +4461,7 @@ "import pandas as pd\n", "\n", "manhattan_entries = pd.read_csv(\n", - " '../data/NYC_subway_daily.csv', parse_dates=['Datetime'], \n", + " '../data/NYC_subway_daily.csv', parse_dates=['Datetime'],\n", " index_col=['Borough', 'Datetime']\n", ").unstack(0)['Entries']['M']\n", "manhattan_entries.head()" @@ -4541,7 +4541,7 @@ "source": [ "def subway_histogram(data, bins, date_range):\n", " _, bin_ranges = np.histogram(data, bins=bins)\n", - " \n", + "\n", " weekday_mask = data.index.weekday < 5\n", " configs = [ # CHANGE: add bar color to config\n", " {'label': 'Weekend', 'mask': ~weekday_mask, 'color': 'orange'},\n", @@ -4557,7 +4557,7 @@ "\n", " ax.xaxis.set_major_formatter(ticker.EngFormatter())\n", " despine(ax)\n", - " \n", + "\n", " # CHANGES: update formatting and add legend\n", " ax.set(\n", " xlim=(0, None), ylim=(0, 120), xlabel='Entries', ylabel='Frequency',\n", diff --git a/slides/3-interactivity.ipynb b/slides/3-interactivity.ipynb index c6f6b54..414fd13 100644 --- a/slides/3-interactivity.ipynb +++ b/slides/3-interactivity.ipynb @@ -1264,7 +1264,7 @@ "

The interactivity works best in a notebook environment – here's an example of the slider, tooltips, and zoom/pan functionality in action:

\n", "\n", "
\n", - " \n", + " \n", "
" ] }, @@ -2268,7 +2268,7 @@ "

The result can be interacted with after displaying it, but this kind of interactivity only works in the notebook. Here's an example:

\n", "\n", "
\n", - " \"Linked\n", + " \"Linked\n", "
\n", " This example uses tabs to display the layout rather than side by side like we did. Tabs are a little buggy in this version, so we didn't use them.\n", "
" @@ -3309,7 +3309,7 @@ "metadata": {}, "source": [ "
\n", - " \n", + " \n", "
" ] }, @@ -3353,7 +3353,7 @@ }, "source": [ "
\n", - " \n", + " \n", "
" ] }, @@ -3384,12 +3384,12 @@ "flight_stats = pd.read_csv(\n", " '../data/T100_MARKET_ALL_CARRIER.zip',\n", " usecols=[\n", - " 'CLASS', 'REGION', 'UNIQUE_CARRIER_NAME', 'ORIGIN_CITY_NAME', 'ORIGIN', \n", + " 'CLASS', 'REGION', 'UNIQUE_CARRIER_NAME', 'ORIGIN_CITY_NAME', 'ORIGIN',\n", " 'DEST_CITY_NAME', 'DEST', 'PASSENGERS', 'FREIGHT', 'MAIL'\n", " ]\n", ").rename(lambda x: x.lower(), axis=1).assign(\n", " region=lambda x: x.region.replace({\n", - " 'D': 'Domestic', 'I': 'International', 'A': 'Atlantic', \n", + " 'D': 'Domestic', 'I': 'International', 'A': 'Atlantic',\n", " 'L': 'Latin America', 'P': 'Pacific', 'S': 'System'\n", " }),\n", " route=lambda x: np.where(\n", @@ -3594,7 +3594,7 @@ "source": [ "cities = [\n", " 'Atlanta, GA', 'Chicago, IL', 'New York, NY', 'Los Angeles, CA',\n", - " 'Dallas/Fort Worth, TX', 'Denver, CO', 'Houston, TX', \n", + " 'Dallas/Fort Worth, TX', 'Denver, CO', 'Houston, TX',\n", " 'San Francisco, CA', 'Seattle, WA', 'Orlando, FL'\n", "]\n", "\n", @@ -3621,7 +3621,7 @@ "A **chord diagram** is a way of showing many-to-many relationships between a set of entities called **nodes**: the nodes are arranged in a circle, and chords (which can be thought of as **edges**) are drawn between those that are connected, with the width of the chord encoding the strength of the connection. In this section, we will be making a chord diagram for total passenger service travel between the top 10 cities in 2019:\n", "\n", "
\n", - " \n", + " \n", "
" ] }, @@ -3891,7 +3891,7 @@ "source": [ "chord = hv.Chord(\n", " total_flight_stats,\n", - " kdims=['origin', 'dest'], \n", + " kdims=['origin', 'dest'],\n", " vdims=['passengers', 'origin_city_name', 'dest_city_name', 'mail', 'freight']\n", ")" ] @@ -4105,7 +4105,7 @@ "

The result can be interacted with after displaying it, but it works best in the notebook – the GIF below shows some example interactions. Note that for this visualization the interactivity is what makes it useful:

\n", "\n", "
\n", - " \n", + " \n", "
" ] }, @@ -4126,7 +4126,7 @@ "For our final visualization, we will create a **Sankey plot**, which is a way to visualize flow as edges between nodes. Here, we will use it to analyze airline market share for passenger service flights between the top 5 US cities:\n", "\n", "
\n", - " \n", + " \n", "
" ] }, @@ -4550,7 +4550,7 @@ ], "source": [ "carrier_edges = get_edges(\n", - " domestic_passenger_travel, \n", + " domestic_passenger_travel,\n", " source_col='region',\n", " target_col='unique_carrier_name'\n", ").replace('^Domestic$', 'Top Routes', regex=True)\n", @@ -4751,11 +4751,11 @@ "outputs": [], "source": [ "sankey = hv.Sankey(\n", - " all_edges, \n", + " all_edges,\n", " kdims=['source', 'target'],\n", " vdims=hv.Dimension('passengers', unit='M')\n", ").opts(\n", - " labels='index', label_position='right', cmap='Set1', # node config \n", + " labels='index', label_position='right', cmap='Set1', # node config\n", " edge_color='lightgray', # edge config\n", " width=750, height=600, # plot size config\n", " title='Travel Between the Top 5 Cities in 2019'\n", @@ -4896,7 +4896,7 @@ "

The resulting visualization can be interacted with after displaying it, but it works best in the notebook. Here's an example:

\n", "\n", "
\n", - " \n", + " \n", "
" ] }, @@ -5861,7 +5861,7 @@ "metadata": {}, "source": [ "
\n", - " \n", + " \n", "
" ] }, @@ -5941,7 +5941,7 @@ "*All examples herein were developed exclusively for this workshop – be sure to check out my book, [Hands-On Data Analysis with Pandas](https://www.amazon.com/Hands-Data-Analysis-Pandas-visualization-dp-1800563450/dp/1800563450/), and my [pandas workshop](https://stefaniemolin.com/workshops/pandas-workshop/) for more Python data science content.*\n", "\n", "
\n", - " \n", + " \n", "
\n", " stefaniemolin.com\n", "
\n", diff --git a/slides/4-outro.ipynb b/slides/4-outro.ipynb index 2df228b..80dfd3b 100644 --- a/slides/4-outro.ipynb +++ b/slides/4-outro.ipynb @@ -24,7 +24,7 @@ " alt=\"Please visit https://stefaniemolin.com/feedback to provide your feedback on this session.\"\n", " title=\"QR code to submit feedback (leads to https://stefaniemolin.com/feedback)\"\n", " class=\"qr-code\"\n", - " src=\"https://raw.githubusercontent.com/stefmolin/pandas-workshop/main/media/qr-code.png\"\n", + " src=\"media/qr-code.png\"\n", " />\n", " \n", "
\n", diff --git a/media/2018_subway_entries_histogram.svg b/slides/media/2018_subway_entries_histogram.svg similarity index 100% rename from media/2018_subway_entries_histogram.svg rename to slides/media/2018_subway_entries_histogram.svg diff --git a/media/area_plot.svg b/slides/media/area_plot.svg similarity index 100% rename from media/area_plot.svg rename to slides/media/area_plot.svg diff --git a/media/bar_plot.svg b/slides/media/bar_plot.svg similarity index 100% rename from media/bar_plot.svg rename to slides/media/bar_plot.svg diff --git a/media/chord.gif b/slides/media/chord.gif similarity index 100% rename from media/chord.gif rename to slides/media/chord.gif diff --git a/media/chord.png b/slides/media/chord.png similarity index 100% rename from media/chord.png rename to slides/media/chord.png diff --git a/media/figure_anatomy.svg b/slides/media/figure_anatomy.svg similarity index 100% rename from media/figure_anatomy.svg rename to slides/media/figure_anatomy.svg diff --git a/media/interactive_map.gif b/slides/media/interactive_map.gif similarity index 100% rename from media/interactive_map.gif rename to slides/media/interactive_map.gif diff --git a/media/linked_plots.gif b/slides/media/linked_plots.gif similarity index 100% rename from media/linked_plots.gif rename to slides/media/linked_plots.gif diff --git a/slides/media/pandas_drawing.png b/slides/media/pandas_drawing.png new file mode 100644 index 0000000..4be1915 Binary files /dev/null and b/slides/media/pandas_drawing.png differ diff --git a/media/qr-code.png b/slides/media/qr-code.png similarity index 100% rename from media/qr-code.png rename to slides/media/qr-code.png diff --git a/media/sankey.gif b/slides/media/sankey.gif similarity index 100% rename from media/sankey.gif rename to slides/media/sankey.gif diff --git a/media/sankey.png b/slides/media/sankey.png similarity index 100% rename from media/sankey.png rename to slides/media/sankey.png diff --git a/media/sankey_and_chord.png b/slides/media/sankey_and_chord.png similarity index 100% rename from media/sankey_and_chord.png rename to slides/media/sankey_and_chord.png diff --git a/media/solution-3-1.gif b/slides/media/solution-3-1.gif similarity index 100% rename from media/solution-3-1.gif rename to slides/media/solution-3-1.gif diff --git a/media/solution-3-2.gif b/slides/media/solution-3-2.gif similarity index 100% rename from media/solution-3-2.gif rename to slides/media/solution-3-2.gif diff --git a/slides/media/time_is_up.m4a b/slides/media/time_is_up.m4a new file mode 100644 index 0000000..5e64f75 Binary files /dev/null and b/slides/media/time_is_up.m4a differ diff --git a/slides/templates/larger_font/index.html.j2 b/slides/templates/larger_font/index.html.j2 index 1fbf51f..5d3f384 100644 --- a/slides/templates/larger_font/index.html.j2 +++ b/slides/templates/larger_font/index.html.j2 @@ -371,7 +371,7 @@ require( if (secondsRemaining >= 1) $('.exercise-timer').text(getTimeText(elapsedTime)); else if (secondsRemaining >= 0) { $('.exercise-timer').text(getTimeText(Duration.fromMillis(0))); - const audio = new Audio('https://raw.githubusercontent.com/stefmolin/pandas-workshop/main/media/time_is_up.m4a'); + const audio = new Audio('media/time_is_up.m4a'); audio.play(); } else if (secondsRemaining >= -5) $('.exercise-timer').text("TIME'S UP").css('color', 'red'); diff --git a/slides/templates/regular/index.html.j2 b/slides/templates/regular/index.html.j2 index 35e7f1c..fcb84ca 100644 --- a/slides/templates/regular/index.html.j2 +++ b/slides/templates/regular/index.html.j2 @@ -367,7 +367,7 @@ require( if (secondsRemaining >= 1) $('.exercise-timer').text(getTimeText(elapsedTime)); else if (secondsRemaining >= 0) { $('.exercise-timer').text(getTimeText(Duration.fromMillis(0))); - const audio = new Audio('https://raw.githubusercontent.com/stefmolin/pandas-workshop/main/media/time_is_up.m4a'); + const audio = new Audio('media/time_is_up.m4a'); audio.play(); } else if (secondsRemaining >= -5) $('.exercise-timer').text("TIME'S UP").css('color', 'red');