From bb2308cf0238ffab8063eb57b7718374df011c02 Mon Sep 17 00:00:00 2001 From: cvanelteren Date: Wed, 7 Jan 2026 14:45:02 +1000 Subject: [PATCH 1/7] Add ridgeline plot feature with histogram support Implements ridgeline plots (also known as joyplots) for visualizing distributions of multiple datasets as stacked, overlapping density curves. Features: - Support for both vertical (traditional) and horizontal orientations - Kernel density estimation (KDE) for smooth curves - Histogram mode for binned bar charts (hist=True) - Customizable overlap between ridges - Color specification via colormap or custom colors - Integration with UltraPlot's color cycle - Transparent error handling for invalid distributions - Follows UltraPlot's docstring snippet manager pattern Methods added: - ridgeline(): Create vertical ridgeline plots - ridgelineh(): Create horizontal ridgeline plots - _apply_ridgeline(): Internal implementation Tests added: - test_ridgeline_basic: Basic KDE functionality - test_ridgeline_colormap: Colormap support - test_ridgeline_horizontal: Horizontal orientation - test_ridgeline_custom_colors: Custom color specification - test_ridgeline_histogram: Histogram mode - test_ridgeline_histogram_colormap: Histogram with colormap - test_ridgeline_comparison_kde_vs_hist: KDE vs histogram comparison - test_ridgeline_empty_data: Error handling for empty data - test_ridgeline_label_mismatch: Error handling for label mismatch Docstrings registered with snippet manager following UltraPlot conventions. --- ultraplot/axes/plot.py | 307 +++++++++++++++++++ ultraplot/tests/test_statistical_plotting.py | 215 ++++++++++++- 2 files changed, 521 insertions(+), 1 deletion(-) diff --git a/ultraplot/axes/plot.py b/ultraplot/axes/plot.py index 526e6ffac..82ec3de15 100644 --- a/ultraplot/axes/plot.py +++ b/ultraplot/axes/plot.py @@ -1109,6 +1109,82 @@ ) +# Ridgeline plot docstrings +_ridgeline_docstring = """ +Create a {orientation} ridgeline plot (also known as a joyplot). + +Ridgeline plots visualize distributions of multiple datasets as stacked, +overlapping density curves. They are useful for comparing distributions +across categories or over time. + +Parameters +---------- +data : list of array-like + List of distributions to plot. Each element should be an array-like + object containing the data points for one distribution. +labels : list of str, optional + Labels for each distribution. If not provided, generates default labels. +overlap : float, default: 0.5 + Amount of overlap between ridges, from 0 (no overlap) to 1 (full overlap). + Higher values create more dramatic visual overlapping. +bandwidth : float, optional + Bandwidth parameter for kernel density estimation. If None (default), + uses automatic bandwidth selection via Scott's rule. Only used when hist=False. +hist : bool, default: False + If True, uses histograms instead of kernel density estimation. +bins : int or sequence or str, default: 'auto' + Bin specification for histograms. Can be an integer (number of bins), + a sequence defining bin edges, or a string method ('auto', 'sturges', etc.). + Only used when hist=True. +fill : bool, default: True + Whether to fill the area under each density curve. +alpha : float, default: 0.7 + Transparency level for filled areas (0=transparent, 1=opaque). +linewidth : float, default: 1.5 + Width of the outline for each ridge. +edgecolor : color, default: 'black' + Color of the ridge outlines. +facecolor : color or list of colors, optional + Fill color(s) for the ridges. If a single color, applies to all ridges. + If a list, must match the number of distributions. If None, uses the + current color cycle or colormap. +cmap : str or Colormap, optional + Colormap name or object to use for coloring ridges. Overridden by facecolor. + +Returns +------- +list + List of artist objects for each ridge (PolyCollection or Line2D). + +Examples +-------- +>>> import ultraplot as uplt +>>> import numpy as np +>>> fig, ax = uplt.subplots() +>>> data = [np.random.normal(i, 1, 1000) for i in range(5)] +>>> ax.ridgeline(data, labels=[f'Group {{i+1}}' for i in range(5)]) + +>>> # With colormap +>>> fig, ax = uplt.subplots() +>>> ax.ridgeline(data, cmap='viridis', overlap=0.7) + +>>> # With histograms instead of KDE +>>> fig, ax = uplt.subplots() +>>> ax.ridgeline(data, hist=True, bins=20) + +See Also +-------- +violinplot : Violin plots for distribution visualization +hist : Histogram for single distribution +""" +docstring._snippet_manager["plot.ridgeline"] = _ridgeline_docstring.format( + orientation="vertical" +) +docstring._snippet_manager["plot.ridgelineh"] = _ridgeline_docstring.format( + orientation="horizontal" +) + + # 1D histogram docstrings _hist_docstring = """ Plot {orientation} histograms. @@ -5262,6 +5338,237 @@ def violinploth(self, *args, **kwargs): kwargs = _parse_vert(default_vert=False, **kwargs) return self._apply_violinplot(*args, **kwargs) + def _apply_ridgeline( + self, + data, + labels=None, + overlap=0.5, + bandwidth=None, + hist=False, + bins="auto", + fill=True, + alpha=0.7, + linewidth=1.5, + edgecolor="black", + facecolor=None, + cmap=None, + vert=True, + **kwargs, + ): + """ + Apply ridgeline plot (joyplot). + + Parameters + ---------- + data : list of array-like + List of distributions to plot as ridges. + labels : list of str, optional + Labels for each distribution. + overlap : float, default: 0.5 + Amount of overlap between ridges (0-1). Higher values create more overlap. + bandwidth : float, optional + Bandwidth for kernel density estimation. If None, uses automatic selection. + Only used when hist=False. + hist : bool, default: False + If True, use histograms instead of kernel density estimation. + bins : int or sequence or str, default: 'auto' + Bin specification for histograms. Passed to numpy.histogram. + Only used when hist=True. + fill : bool, default: True + Whether to fill the area under each curve. + alpha : float, default: 0.7 + Transparency of filled areas. + linewidth : float, default: 1.5 + Width of the ridge lines. + edgecolor : color, default: 'black' + Color of the ridge lines. + facecolor : color or list of colors, optional + Fill color(s). If None, uses current color cycle or colormap. + cmap : str or Colormap, optional + Colormap to use for coloring ridges. + vert : bool, default: True + If True, ridges are horizontal (traditional ridgeline plot). + If False, ridges are vertical. + **kwargs + Additional keyword arguments passed to fill_between or fill_betweenx. + + Returns + ------- + list + List of PolyCollection objects for each ridge. + """ + from scipy.stats import gaussian_kde + + # Validate input + if not isinstance(data, (list, tuple)): + data = [data] + + n_ridges = len(data) + if labels is None: + labels = [f"Ridge {i+1}" for i in range(n_ridges)] + elif len(labels) != n_ridges: + raise ValueError( + f"Number of labels ({len(labels)}) must match number of data series ({n_ridges})" + ) + + # Determine colors + if facecolor is None: + if cmap is not None: + # Use colormap + if isinstance(cmap, str): + cmap = mcm.get_cmap(cmap) + colors = [cmap(i / (n_ridges - 1)) for i in range(n_ridges)] + else: + # Use color cycle + parser = self._get_patches_for_fill + colors = [parser.get_next_color() for _ in range(n_ridges)] + elif isinstance(facecolor, (list, tuple)): + colors = list(facecolor) + else: + colors = [facecolor] * n_ridges + + # Ensure we have enough colors + if len(colors) < n_ridges: + colors = colors * (n_ridges // len(colors) + 1) + colors = colors[:n_ridges] + + # Calculate KDE or histogram for each distribution + ridges = [] + for i, dist in enumerate(data): + dist = np.asarray(dist).ravel() + dist = dist[~np.isnan(dist)] # Remove NaNs + + if len(dist) < 2: + warnings._warn_ultraplot( + f"Distribution {i} has less than 2 points, skipping" + ) + continue + + if hist: + # Use histogram + try: + counts, bin_edges = np.histogram(dist, bins=bins) + # Create x values as bin centers + x = (bin_edges[:-1] + bin_edges[1:]) / 2 + # Extend to bin edges for proper fill + x_extended = np.concatenate([[bin_edges[0]], x, [bin_edges[-1]]]) + y_extended = np.concatenate([[0], counts, [0]]) + ridges.append((x_extended, y_extended)) + except Exception as e: + warnings._warn_ultraplot( + f"Histogram failed for distribution {i}: {e}, skipping" + ) + continue + else: + # Perform KDE + try: + kde = gaussian_kde(dist, bw_method=bandwidth) + # Create smooth x values + x_min, x_max = dist.min(), dist.max() + x_range = x_max - x_min + x_margin = x_range * 0.1 # 10% margin + x = np.linspace(x_min - x_margin, x_max + x_margin, 200) + y = kde(x) + ridges.append((x, y)) + except Exception as e: + warnings._warn_ultraplot( + f"KDE failed for distribution {i}: {e}, skipping" + ) + continue + + if not ridges: + raise ValueError("No valid distributions to plot") + + # Normalize heights and add vertical offsets + max_height = max(y.max() for x, y in ridges) + spacing = max_height * (1 + overlap) + + artists = [] + for i, (x, y) in enumerate(ridges): + # Normalize and offset + y_normalized = y / max_height + offset = i * spacing + y_plot = y_normalized + offset + + if vert: + # Traditional horizontal ridges + if fill: + poly = self.fill_between( + x, + offset, + y_plot, + facecolor=colors[i], + alpha=alpha, + edgecolor=edgecolor, + linewidth=linewidth, + label=labels[i], + **kwargs, + ) + else: + poly = self.plot( + x, + y_plot, + color=colors[i], + linewidth=linewidth, + label=labels[i], + **kwargs, + )[0] + else: + # Vertical ridges + if fill: + poly = self.fill_betweenx( + x, + offset, + y_plot, + facecolor=colors[i], + alpha=alpha, + edgecolor=edgecolor, + linewidth=linewidth, + label=labels[i], + **kwargs, + ) + else: + poly = self.plot( + y_plot, + x, + color=colors[i], + linewidth=linewidth, + label=labels[i], + **kwargs, + )[0] + + artists.append(poly) + + # Set appropriate labels and limits + if vert: + self.set_yticks(np.arange(n_ridges) * spacing) + self.set_yticklabels(labels[: len(ridges)]) + self.set_ylabel("") + else: + self.set_xticks(np.arange(n_ridges) * spacing) + self.set_xticklabels(labels[: len(ridges)]) + self.set_xlabel("") + + return artists + + @inputs._preprocess_or_redirect("data") + @docstring._snippet_manager + def ridgeline(self, data, **kwargs): + """ + %(plot.ridgeline)s + """ + kwargs = _parse_vert(default_vert=True, **kwargs) + return self._apply_ridgeline(data, **kwargs) + + @inputs._preprocess_or_redirect("data") + @docstring._snippet_manager + def ridgelineh(self, data, **kwargs): + """ + %(plot.ridgelineh)s + """ + kwargs = _parse_vert(default_vert=False, **kwargs) + return self._apply_ridgeline(data, **kwargs) + def _apply_hist( self, xs, diff --git a/ultraplot/tests/test_statistical_plotting.py b/ultraplot/tests/test_statistical_plotting.py index d1aff89c3..7e5f3183c 100644 --- a/ultraplot/tests/test_statistical_plotting.py +++ b/ultraplot/tests/test_statistical_plotting.py @@ -1,8 +1,11 @@ #!/usr/bin/env python3 # import ultraplot as uplt -import numpy as np, pandas as pd, ultraplot as uplt +import numpy as np +import pandas as pd import pytest +import ultraplot as uplt + @pytest.mark.mpl_image_compare def test_statistical_boxplot(rng): @@ -93,3 +96,213 @@ def test_input_violin_box_options(): axes[3].bar(data, median=True, boxstds=True, bars=False) axes[3].format(title="boxstds") return fig + + +@pytest.mark.mpl_image_compare +def test_ridgeline_basic(rng): + """ + Test basic ridgeline plot functionality. + """ + # Generate test data with different means + data = [rng.normal(i, 1, 500) for i in range(5)] + labels = [f"Group {i+1}" for i in range(5)] + + fig, ax = uplt.subplots(figsize=(8, 6)) + ax.ridgeline(data, labels=labels, overlap=0.5, alpha=0.7) + ax.format( + title="Basic Ridgeline Plot", + xlabel="Value", + grid=False, + ) + return fig + + +@pytest.mark.mpl_image_compare +def test_ridgeline_colormap(rng): + """ + Test ridgeline plot with colormap. + """ + # Generate test data + data = [rng.normal(i * 0.5, 1, 300) for i in range(6)] + labels = [f"Distribution {i+1}" for i in range(6)] + + fig, ax = uplt.subplots(figsize=(8, 6)) + ax.ridgeline( + data, + labels=labels, + overlap=0.7, + cmap="viridis", + alpha=0.8, + linewidth=2, + ) + ax.format( + title="Ridgeline Plot with Colormap", + xlabel="Value", + grid=False, + ) + return fig + + +@pytest.mark.mpl_image_compare +def test_ridgeline_horizontal(rng): + """ + Test horizontal ridgeline plot (vertical orientation). + """ + # Generate test data + data = [rng.normal(i, 0.8, 400) for i in range(4)] + labels = ["Alpha", "Beta", "Gamma", "Delta"] + + fig, ax = uplt.subplots(figsize=(6, 8)) + ax.ridgelineh( + data, + labels=labels, + overlap=0.6, + facecolor="skyblue", + alpha=0.6, + ) + ax.format( + title="Horizontal Ridgeline Plot", + ylabel="Value", + grid=False, + ) + return fig + + +@pytest.mark.mpl_image_compare +def test_ridgeline_custom_colors(rng): + """ + Test ridgeline plot with custom colors. + """ + # Generate test data + data = [rng.normal(i * 2, 1.5, 350) for i in range(4)] + labels = ["Red", "Green", "Blue", "Yellow"] + colors = ["red", "green", "blue", "yellow"] + + fig, ax = uplt.subplots(figsize=(8, 6)) + ax.ridgeline( + data, + labels=labels, + overlap=0.5, + facecolor=colors, + alpha=0.7, + edgecolor="black", + linewidth=1.5, + ) + ax.format( + title="Ridgeline Plot with Custom Colors", + xlabel="Value", + grid=False, + ) + return fig + + +def test_ridgeline_empty_data(): + """ + Test that ridgeline plot raises error with empty data. + """ + fig, ax = uplt.subplots() + with pytest.raises(ValueError, match="No valid distributions to plot"): + ax.ridgeline([[], []]) + + +def test_ridgeline_label_mismatch(): + """ + Test that ridgeline plot raises error when labels don't match data length. + """ + data = [np.random.normal(0, 1, 100) for _ in range(3)] + labels = ["A", "B"] # Only 2 labels for 3 distributions + + fig, ax = uplt.subplots() + with pytest.raises(ValueError, match="Number of labels.*must match"): + ax.ridgeline(data, labels=labels) + + +@pytest.mark.mpl_image_compare +def test_ridgeline_histogram(rng): + """ + Test ridgeline plot with histograms instead of KDE. + """ + # Generate test data with different means + data = [rng.normal(i * 1.5, 1, 500) for i in range(5)] + labels = [f"Group {i+1}" for i in range(5)] + + fig, ax = uplt.subplots(figsize=(8, 6)) + ax.ridgeline( + data, + labels=labels, + overlap=0.5, + alpha=0.7, + hist=True, + bins=20, + ) + ax.format( + title="Ridgeline Plot with Histograms", + xlabel="Value", + grid=False, + ) + return fig + + +@pytest.mark.mpl_image_compare +def test_ridgeline_histogram_colormap(rng): + """ + Test ridgeline histogram plot with colormap. + """ + # Generate test data + data = [rng.normal(i * 0.8, 1.2, 400) for i in range(6)] + labels = [f"Dist {i+1}" for i in range(6)] + + fig, ax = uplt.subplots(figsize=(8, 6)) + ax.ridgeline( + data, + labels=labels, + overlap=0.6, + cmap="plasma", + alpha=0.75, + hist=True, + bins=25, + linewidth=1.5, + ) + ax.format( + title="Histogram Ridgeline with Plasma Colormap", + xlabel="Value", + grid=False, + ) + return fig + + +@pytest.mark.mpl_image_compare +def test_ridgeline_comparison_kde_vs_hist(rng): + """ + Test comparison of KDE vs histogram ridgeline plots. + """ + # Generate test data + data = [rng.normal(i, 0.8, 300) for i in range(4)] + labels = ["A", "B", "C", "D"] + + fig, axs = uplt.subplots(ncols=2, figsize=(12, 5)) + + # KDE version + axs[0].ridgeline( + data, + labels=labels, + overlap=0.5, + cmap="viridis", + alpha=0.7, + ) + axs[0].format(title="KDE Ridgeline", xlabel="Value", grid=False) + + # Histogram version + axs[1].ridgeline( + data, + labels=labels, + overlap=0.5, + cmap="viridis", + alpha=0.7, + hist=True, + bins=15, + ) + axs[1].format(title="Histogram Ridgeline", xlabel="Value", grid=False) + + fig.format(suptitle="KDE vs Histogram Ridgeline Comparison") + return fig From edd88de37ecf46dd13d4ce6a4ffc226144abb3fc Mon Sep 17 00:00:00 2001 From: cvanelteren Date: Wed, 7 Jan 2026 15:00:22 +1000 Subject: [PATCH 2/7] Fix ridgeline plot outline to exclude baseline The ridge outlines now only trace the top curve of each distribution, not the baseline. This is achieved by: - Using fill_between/fill_betweenx with edgecolor='none' - Drawing a separate plot() line on top for the outline - Proper z-ordering to ensure outline appears above fill This creates cleaner ridgeline plots where the baseline doesn't have a visible edge line connecting the endpoints. --- ultraplot/axes/plot.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/ultraplot/axes/plot.py b/ultraplot/axes/plot.py index 82ec3de15..1a2f40ad4 100644 --- a/ultraplot/axes/plot.py +++ b/ultraplot/axes/plot.py @@ -5493,17 +5493,25 @@ def _apply_ridgeline( if vert: # Traditional horizontal ridges if fill: + # Fill without edge poly = self.fill_between( x, offset, y_plot, facecolor=colors[i], alpha=alpha, - edgecolor=edgecolor, - linewidth=linewidth, + edgecolor="none", label=labels[i], **kwargs, ) + # Draw outline on top (excluding baseline) + self.plot( + x, + y_plot, + color=edgecolor, + linewidth=linewidth, + zorder=poly.get_zorder() + 0.1, + ) else: poly = self.plot( x, @@ -5516,17 +5524,25 @@ def _apply_ridgeline( else: # Vertical ridges if fill: + # Fill without edge poly = self.fill_betweenx( x, offset, y_plot, facecolor=colors[i], alpha=alpha, - edgecolor=edgecolor, - linewidth=linewidth, + edgecolor="none", label=labels[i], **kwargs, ) + # Draw outline on top (excluding baseline) + self.plot( + y_plot, + x, + color=edgecolor, + linewidth=linewidth, + zorder=poly.get_zorder() + 0.1, + ) else: poly = self.plot( y_plot, From ef200ca4a56af63cadcb627cfde82721e9031c40 Mon Sep 17 00:00:00 2001 From: cvanelteren Date: Wed, 7 Jan 2026 15:02:18 +1000 Subject: [PATCH 3/7] Improve z-ordering for ridgeline plots Implements explicit z-ordering to ensure proper layering: - Each ridge i gets: fill at base+i*2, outline at base+i*2+1 - Later ridges appear on top of earlier ridges - Outline always appears on top of its corresponding fill - Base zorder defaults to 2 (above grid/axes elements) - User can override base zorder via zorder parameter This ensures clean visual layering even with high overlap values and when other plot elements are present (e.g., grids). --- ultraplot/axes/plot.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/ultraplot/axes/plot.py b/ultraplot/axes/plot.py index 1a2f40ad4..bcf6e914a 100644 --- a/ultraplot/axes/plot.py +++ b/ultraplot/axes/plot.py @@ -5484,12 +5484,20 @@ def _apply_ridgeline( spacing = max_height * (1 + overlap) artists = [] + # Base zorder for ridgelines - use a high value to ensure they're on top + base_zorder = kwargs.pop("zorder", 2) + for i, (x, y) in enumerate(ridges): # Normalize and offset y_normalized = y / max_height offset = i * spacing y_plot = y_normalized + offset + # Each ridge gets its own zorder, with fill and outline properly layered + # Ridge i: fill at base + i*2, outline at base + i*2 + 1 + fill_zorder = base_zorder + i * 2 + outline_zorder = fill_zorder + 1 + if vert: # Traditional horizontal ridges if fill: @@ -5502,7 +5510,7 @@ def _apply_ridgeline( alpha=alpha, edgecolor="none", label=labels[i], - **kwargs, + zorder=fill_zorder, ) # Draw outline on top (excluding baseline) self.plot( @@ -5510,7 +5518,7 @@ def _apply_ridgeline( y_plot, color=edgecolor, linewidth=linewidth, - zorder=poly.get_zorder() + 0.1, + zorder=outline_zorder, ) else: poly = self.plot( @@ -5519,7 +5527,7 @@ def _apply_ridgeline( color=colors[i], linewidth=linewidth, label=labels[i], - **kwargs, + zorder=outline_zorder, )[0] else: # Vertical ridges @@ -5533,7 +5541,7 @@ def _apply_ridgeline( alpha=alpha, edgecolor="none", label=labels[i], - **kwargs, + zorder=fill_zorder, ) # Draw outline on top (excluding baseline) self.plot( @@ -5541,7 +5549,7 @@ def _apply_ridgeline( x, color=edgecolor, linewidth=linewidth, - zorder=poly.get_zorder() + 0.1, + zorder=outline_zorder, ) else: poly = self.plot( @@ -5550,7 +5558,7 @@ def _apply_ridgeline( color=colors[i], linewidth=linewidth, label=labels[i], - **kwargs, + zorder=outline_zorder, )[0] artists.append(poly) From 74533c6590222bf5d1f323999fea9466851e6fba Mon Sep 17 00:00:00 2001 From: cvanelteren Date: Wed, 7 Jan 2026 15:05:53 +1000 Subject: [PATCH 4/7] Fix z-ordering: lower ridges now correctly appear in front Reversed the z-order assignment so that visually lower ridges (smaller index, closer to viewer) have higher z-order values. Z-order formula: fill_zorder = base + (n_ridges - i - 1) * 2 This ensures proper visual layering where: - Ridge 0 (bottom, front) has highest z-order - Ridge n-1 (top, back) has lowest z-order This prevents ridges from incorrectly popping in front of others when overlap is high, maintaining the correct visual depth. --- ultraplot/axes/plot.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ultraplot/axes/plot.py b/ultraplot/axes/plot.py index bcf6e914a..369d40461 100644 --- a/ultraplot/axes/plot.py +++ b/ultraplot/axes/plot.py @@ -5486,6 +5486,7 @@ def _apply_ridgeline( artists = [] # Base zorder for ridgelines - use a high value to ensure they're on top base_zorder = kwargs.pop("zorder", 2) + n_ridges = len(ridges) for i, (x, y) in enumerate(ridges): # Normalize and offset @@ -5494,8 +5495,9 @@ def _apply_ridgeline( y_plot = y_normalized + offset # Each ridge gets its own zorder, with fill and outline properly layered - # Ridge i: fill at base + i*2, outline at base + i*2 + 1 - fill_zorder = base_zorder + i * 2 + # Lower ridges (smaller i, visually in front) get higher z-order + # Ridge i: fill at base + (n-i-1)*2, outline at base + (n-i-1)*2 + 1 + fill_zorder = base_zorder + (n_ridges - i - 1) * 2 outline_zorder = fill_zorder + 1 if vert: From 2640cbca69a516702f0ca17c25a5bf87129a7996 Mon Sep 17 00:00:00 2001 From: cvanelteren Date: Wed, 7 Jan 2026 15:24:59 +1000 Subject: [PATCH 5/7] Add kde_kw parameter for flexible KDE control Replaced explicit bandwidth/weights parameters with a more flexible kde_kw dictionary that passes all kwargs to scipy.stats.gaussian_kde. Features: - kde_kw: dict parameter for passing any KDE arguments (bw_method, weights, etc.) - points: int parameter to control number of evaluation points (default 200) - More maintainable and extensible than exposing individual parameters - Follows UltraPlot's convention of using *_kw parameters Example usage: - Custom bandwidth: kde_kw={'bw_method': 0.5} - With weights: kde_kw={'weights': weight_array} - Silverman method: kde_kw={'bw_method': 'silverman'} - Smoother curves: points=500 Tests added: - test_ridgeline_kde_kw: Tests various kde_kw configurations - test_ridgeline_points: Tests points parameter --- ultraplot/axes/plot.py | 35 ++++++++--- ultraplot/tests/test_statistical_plotting.py | 62 ++++++++++++++++++++ 2 files changed, 89 insertions(+), 8 deletions(-) diff --git a/ultraplot/axes/plot.py b/ultraplot/axes/plot.py index 369d40461..3dea91e27 100644 --- a/ultraplot/axes/plot.py +++ b/ultraplot/axes/plot.py @@ -1127,9 +1127,16 @@ overlap : float, default: 0.5 Amount of overlap between ridges, from 0 (no overlap) to 1 (full overlap). Higher values create more dramatic visual overlapping. -bandwidth : float, optional - Bandwidth parameter for kernel density estimation. If None (default), - uses automatic bandwidth selection via Scott's rule. Only used when hist=False. +kde_kw : dict, optional + Keyword arguments passed to `scipy.stats.gaussian_kde`. Common parameters include: + + * ``bw_method`` : Bandwidth selection method (scalar, 'scott', 'silverman', or callable) + * ``weights`` : Array of weights for each data point + + Only used when hist=False. +points : int, default: 200 + Number of evaluation points for KDE curves. Higher values create smoother + curves but take longer to compute. Only used when hist=False. hist : bool, default: False If True, uses histograms instead of kernel density estimation. bins : int or sequence or str, default: 'auto' @@ -5343,7 +5350,8 @@ def _apply_ridgeline( data, labels=None, overlap=0.5, - bandwidth=None, + kde_kw=None, + points=200, hist=False, bins="auto", fill=True, @@ -5366,9 +5374,16 @@ def _apply_ridgeline( Labels for each distribution. overlap : float, default: 0.5 Amount of overlap between ridges (0-1). Higher values create more overlap. - bandwidth : float, optional - Bandwidth for kernel density estimation. If None, uses automatic selection. + kde_kw : dict, optional + Keyword arguments passed to `scipy.stats.gaussian_kde`. Common parameters: + + * ``bw_method`` : Bandwidth selection method + * ``weights`` : Array of weights for each data point + Only used when hist=False. + points : int, default: 200 + Number of points to evaluate the KDE at. Higher values create smoother curves + but take longer to compute. Only used when hist=False. hist : bool, default: False If True, use histograms instead of kernel density estimation. bins : int or sequence or str, default: 'auto' @@ -5432,6 +5447,10 @@ def _apply_ridgeline( colors = colors * (n_ridges // len(colors) + 1) colors = colors[:n_ridges] + # Prepare KDE kwargs + if kde_kw is None: + kde_kw = {} + # Calculate KDE or histogram for each distribution ridges = [] for i, dist in enumerate(data): @@ -5462,12 +5481,12 @@ def _apply_ridgeline( else: # Perform KDE try: - kde = gaussian_kde(dist, bw_method=bandwidth) + kde = gaussian_kde(dist, **kde_kw) # Create smooth x values x_min, x_max = dist.min(), dist.max() x_range = x_max - x_min x_margin = x_range * 0.1 # 10% margin - x = np.linspace(x_min - x_margin, x_max + x_margin, 200) + x = np.linspace(x_min - x_margin, x_max + x_margin, points) y = kde(x) ridges.append((x, y)) except Exception as e: diff --git a/ultraplot/tests/test_statistical_plotting.py b/ultraplot/tests/test_statistical_plotting.py index 7e5f3183c..f68d30a7c 100644 --- a/ultraplot/tests/test_statistical_plotting.py +++ b/ultraplot/tests/test_statistical_plotting.py @@ -306,3 +306,65 @@ def test_ridgeline_comparison_kde_vs_hist(rng): fig.format(suptitle="KDE vs Histogram Ridgeline Comparison") return fig + + +def test_ridgeline_kde_kw(rng): + """ + Test that kde_kw parameter passes arguments to gaussian_kde correctly. + """ + data = [rng.normal(i, 1, 300) for i in range(3)] + labels = ["A", "B", "C"] + + # Test with custom bandwidth + fig, ax = uplt.subplots() + artists = ax.ridgeline( + data, + labels=labels, + overlap=0.5, + kde_kw={"bw_method": 0.5}, + ) + assert len(artists) == 3 + uplt.close(fig) + + # Test with weights + fig, ax = uplt.subplots() + weights = np.ones(300) * 2 # Uniform weights + artists = ax.ridgeline( + data, + labels=labels, + overlap=0.5, + kde_kw={"weights": weights}, + ) + assert len(artists) == 3 + uplt.close(fig) + + # Test with silverman bandwidth + fig, ax = uplt.subplots() + artists = ax.ridgeline( + data, + labels=labels, + overlap=0.5, + kde_kw={"bw_method": "silverman"}, + ) + assert len(artists) == 3 + uplt.close(fig) + + +def test_ridgeline_points(rng): + """ + Test that points parameter controls KDE evaluation points. + """ + data = [rng.normal(i, 1, 300) for i in range(3)] + labels = ["A", "B", "C"] + + # Test with different point counts + for points in [50, 200, 500]: + fig, ax = uplt.subplots() + artists = ax.ridgeline( + data, + labels=labels, + overlap=0.5, + points=points, + ) + assert len(artists) == 3 + uplt.close(fig) From baf636cd03fce5b70af230cdc59629b70767e43a Mon Sep 17 00:00:00 2001 From: cvanelteren Date: Wed, 7 Jan 2026 19:15:54 +1000 Subject: [PATCH 6/7] Add continuous coordinate-based positioning for scientific ridgeline plots Implements two distinct positioning modes for ridgeline plots: 1. Categorical Positioning (default): Evenly-spaced ridges with discrete labels - Uses overlap parameter to control spacing - Traditional 'joyplot' aesthetic 2. Continuous Positioning: Ridges anchored to specific Y-coordinates - Enabled by providing 'positions' parameter - 'height' parameter controls ridge height in Y-axis units - Essential for scientific plots where Y-axis represents physical variables - Supports: time series, depth profiles, redshift distributions, etc. Parameters: - positions: Array of Y-coordinates for each ridge - height: Ridge height in Y-axis units (auto-determined if not provided) Scientific use cases: - Ocean temperature profiles vs depth - Galaxy distributions vs redshift - Climate data over time - Atmospheric profiles vs altitude - Any data where the vertical axis has physical meaning Tests added: - test_ridgeline_continuous_positioning: Visual test of continuous mode - test_ridgeline_continuous_vs_categorical: Side-by-side comparison - test_ridgeline_continuous_errors: Error handling validation - test_ridgeline_continuous_auto_height: Auto height calculation --- ultraplot/axes/plot.py | 110 ++++++++++++++++--- ultraplot/tests/test_statistical_plotting.py | 94 ++++++++++++++++ 2 files changed, 189 insertions(+), 15 deletions(-) diff --git a/ultraplot/axes/plot.py b/ultraplot/axes/plot.py index 3dea91e27..515e83c08 100644 --- a/ultraplot/axes/plot.py +++ b/ultraplot/axes/plot.py @@ -1124,9 +1124,20 @@ object containing the data points for one distribution. labels : list of str, optional Labels for each distribution. If not provided, generates default labels. +positions : array-like, optional + Y-coordinates for positioning each ridge. If provided, enables continuous + (coordinate-based) positioning mode where ridges are anchored to specific + numerical coordinates along the Y-axis. If None (default), uses categorical + positioning with evenly-spaced ridges. +height : float or array-like, optional + Height of each ridge in Y-axis units. Only used in continuous positioning mode + (when positions is provided). Can be a single value applied to all ridges or + an array of values (one per ridge). If None, defaults to the minimum spacing + between positions divided by 2. overlap : float, default: 0.5 Amount of overlap between ridges, from 0 (no overlap) to 1 (full overlap). - Higher values create more dramatic visual overlapping. + Higher values create more dramatic visual overlapping. Only used in categorical + positioning mode (when positions is None). kde_kw : dict, optional Keyword arguments passed to `scipy.stats.gaussian_kde`. Common parameters include: @@ -1179,6 +1190,12 @@ >>> fig, ax = uplt.subplots() >>> ax.ridgeline(data, hist=True, bins=20) +>>> # Continuous positioning (e.g., at specific depths) +>>> fig, ax = uplt.subplots() +>>> depths = [0, 10, 25, 50, 100] # meters +>>> ax.ridgeline(data, positions=depths, height=8, labels=['Surface', '10m', '25m', '50m', '100m']) +>>> ax.format(ylabel='Depth (m)', xlabel='Temperature (°C)') + See Also -------- violinplot : Violin plots for distribution visualization @@ -5349,6 +5366,8 @@ def _apply_ridgeline( self, data, labels=None, + positions=None, + height=None, overlap=0.5, kde_kw=None, points=200, @@ -5372,8 +5391,14 @@ def _apply_ridgeline( List of distributions to plot as ridges. labels : list of str, optional Labels for each distribution. + positions : array-like, optional + Y-coordinates for continuous positioning mode. If provided, ridges are + anchored to these coordinates along the Y-axis. + height : float or array-like, optional + Height of each ridge in Y-axis units (continuous mode only). overlap : float, default: 0.5 Amount of overlap between ridges (0-1). Higher values create more overlap. + Only used in categorical mode. kde_kw : dict, optional Keyword arguments passed to `scipy.stats.gaussian_kde`. Common parameters: @@ -5498,9 +5523,41 @@ def _apply_ridgeline( if not ridges: raise ValueError("No valid distributions to plot") - # Normalize heights and add vertical offsets - max_height = max(y.max() for x, y in ridges) - spacing = max_height * (1 + overlap) + # Determine positioning mode + continuous_mode = positions is not None + n_ridges = len(ridges) + + if continuous_mode: + # Continuous (coordinate-based) positioning mode + positions = np.asarray(positions) + if len(positions) != len(data): + raise ValueError( + f"Number of positions ({len(positions)}) must match " + f"number of data series ({len(data)})" + ) + + # Handle height parameter + if height is None: + # Auto-determine height from position spacing + if len(positions) > 1: + min_spacing = np.min(np.diff(np.sort(positions))) + height = min_spacing / 2 + else: + height = 1.0 + + if np.isscalar(height): + heights = np.full(n_ridges, height) + else: + heights = np.asarray(height) + if len(heights) != n_ridges: + raise ValueError( + f"Number of heights ({len(heights)}) must match " + f"number of ridges ({n_ridges})" + ) + else: + # Categorical (evenly-spaced) positioning mode + max_height = max(y.max() for x, y in ridges) + spacing = max_height * (1 + overlap) artists = [] # Base zorder for ridgelines - use a high value to ensure they're on top @@ -5508,10 +5565,20 @@ def _apply_ridgeline( n_ridges = len(ridges) for i, (x, y) in enumerate(ridges): - # Normalize and offset - y_normalized = y / max_height - offset = i * spacing - y_plot = y_normalized + offset + if continuous_mode: + # Continuous mode: scale to specified height and position at coordinate + y_max = y.max() + if y_max > 0: + y_scaled = (y / y_max) * heights[i] + else: + y_scaled = y + offset = positions[i] + y_plot = y_scaled + offset + else: + # Categorical mode: normalize and space evenly + y_normalized = y / max_height + offset = i * spacing + y_plot = y_normalized + offset # Each ridge gets its own zorder, with fill and outline properly layered # Lower ridges (smaller i, visually in front) get higher z-order @@ -5585,14 +5652,27 @@ def _apply_ridgeline( artists.append(poly) # Set appropriate labels and limits - if vert: - self.set_yticks(np.arange(n_ridges) * spacing) - self.set_yticklabels(labels[: len(ridges)]) - self.set_ylabel("") + if continuous_mode: + # In continuous mode, positions are actual coordinates + if vert: + # Optionally set ticks at positions + if labels and all(labels[: len(ridges)]): + self.set_yticks(positions[: len(ridges)]) + self.set_yticklabels(labels[: len(ridges)]) + else: + if labels and all(labels[: len(ridges)]): + self.set_xticks(positions[: len(ridges)]) + self.set_xticklabels(labels[: len(ridges)]) else: - self.set_xticks(np.arange(n_ridges) * spacing) - self.set_xticklabels(labels[: len(ridges)]) - self.set_xlabel("") + # Categorical mode: set ticks at evenly-spaced positions + if vert: + self.set_yticks(np.arange(n_ridges) * spacing) + self.set_yticklabels(labels[: len(ridges)]) + self.set_ylabel("") + else: + self.set_xticks(np.arange(n_ridges) * spacing) + self.set_xticklabels(labels[: len(ridges)]) + self.set_xlabel("") return artists diff --git a/ultraplot/tests/test_statistical_plotting.py b/ultraplot/tests/test_statistical_plotting.py index f68d30a7c..cb73757c3 100644 --- a/ultraplot/tests/test_statistical_plotting.py +++ b/ultraplot/tests/test_statistical_plotting.py @@ -368,3 +368,97 @@ def test_ridgeline_points(rng): ) assert len(artists) == 3 uplt.close(fig) + + +@pytest.mark.mpl_image_compare +def test_ridgeline_continuous_positioning(rng): + """ + Test continuous (coordinate-based) positioning mode. + """ + # Simulate temperature data at different depths + depths = [0, 10, 25, 50, 100] + mean_temps = [25, 22, 18, 12, 8] + data = [rng.normal(temp, 2, 400) for temp in mean_temps] + labels = ["Surface", "10m", "25m", "50m", "100m"] + + fig, ax = uplt.subplots(figsize=(8, 7)) + ax.ridgeline( + data, + labels=labels, + positions=depths, + height=8, + cmap="coolwarm", + alpha=0.75, + ) + ax.format( + title="Ocean Temperature by Depth (Continuous)", + xlabel="Temperature (°C)", + ylabel="Depth (m)", + grid=True, + ) + return fig + + +@pytest.mark.mpl_image_compare +def test_ridgeline_continuous_vs_categorical(rng): + """ + Test comparison of continuous vs categorical positioning. + """ + data = [rng.normal(i * 2, 1.5, 300) for i in range(4)] + labels = ["A", "B", "C", "D"] + + fig, axs = uplt.subplots(ncols=2, figsize=(12, 5)) + + # Categorical mode + axs[0].ridgeline(data, labels=labels, overlap=0.6, cmap="viridis", alpha=0.7) + axs[0].format(title="Categorical Positioning", xlabel="Value", grid=False) + + # Continuous mode + positions = [0, 5, 15, 30] + axs[1].ridgeline( + data, labels=labels, positions=positions, height=4, cmap="viridis", alpha=0.7 + ) + axs[1].format( + title="Continuous Positioning", xlabel="Value", ylabel="Coordinate", grid=True + ) + + return fig + + +def test_ridgeline_continuous_errors(rng): + """ + Test error handling in continuous positioning mode. + """ + data = [rng.normal(i, 1, 300) for i in range(3)] + + # Test position length mismatch + fig, ax = uplt.subplots() + with pytest.raises(ValueError, match="Number of positions.*must match"): + ax.ridgeline(data, positions=[0, 10]) + uplt.close(fig) + + # Test height length mismatch + fig, ax = uplt.subplots() + with pytest.raises(ValueError, match="Number of heights.*must match"): + ax.ridgeline(data, positions=[0, 10, 20], height=[5, 10]) + uplt.close(fig) + + +def test_ridgeline_continuous_auto_height(rng): + """ + Test automatic height determination in continuous mode. + """ + data = [rng.normal(i, 1, 300) for i in range(3)] + positions = [0, 10, 25] + + # Test auto height (should work without error) + fig, ax = uplt.subplots() + artists = ax.ridgeline(data, positions=positions) + assert len(artists) == 3 + uplt.close(fig) + + # Test with single position + fig, ax = uplt.subplots() + artists = ax.ridgeline([data[0]], positions=[0]) + assert len(artists) == 1 + uplt.close(fig) From 5e4c8001d923ec3e923c04577febc3369662be8d Mon Sep 17 00:00:00 2001 From: cvanelteren Date: Wed, 7 Jan 2026 19:26:50 +1000 Subject: [PATCH 7/7] Add user guide documentation for ridgeline plots and fix deprecated API - Add comprehensive ridgeline plot examples to docs/stats.py - Include examples for KDE vs histogram modes - Demonstrate categorical vs continuous positioning for scientific use cases - Replace deprecated mcm.get_cmap() with constructor.Colormap() - All 15 ridgeline tests still passing --- docs/stats.py | 180 ++++++++++++++++++++++++++++++++++++++++- ultraplot/axes/plot.py | 3 +- 2 files changed, 177 insertions(+), 6 deletions(-) diff --git a/docs/stats.py b/docs/stats.py index 6303aac52..fb0e7e68b 100644 --- a/docs/stats.py +++ b/docs/stats.py @@ -79,9 +79,10 @@ shadedata = np.percentile(data, (25, 75), axis=0) # dark shading # %% -import ultraplot as uplt import numpy as np +import ultraplot as uplt + # Loop through "vertical" and "horizontal" versions varray = [[1], [2], [3]] harray = [[1, 1], [2, 3], [2, 3]] @@ -164,10 +165,11 @@ # with the same keywords used for :ref:`on-the-fly error bars `. # %% -import ultraplot as uplt import numpy as np import pandas as pd +import ultraplot as uplt + # Sample data N = 500 state = np.random.RandomState(51423) @@ -221,9 +223,10 @@ # will use the same algorithm for kernel density estimation as the `kde` commands. # %% -import ultraplot as uplt import numpy as np +import ultraplot as uplt + # Sample data M, N = 300, 3 state = np.random.RandomState(51423) @@ -244,9 +247,10 @@ ) # %% -import ultraplot as uplt import numpy as np +import ultraplot as uplt + # Sample data N = 500 state = np.random.RandomState(51423) @@ -284,3 +288,171 @@ px = ax.panel("t", space=0) px.hist(x, bins, color=color, fill=True, ec="k") px.format(grid=False, ylocator=[], title=title, titleloc="l") + + +# %% [raw] raw_mimetype="text/restructuredtext" +# .. _ug_ridgeline: +# +# Ridgeline plots +# --------------- +# +# Ridgeline plots (also known as joyplots) visualize distributions of multiple +# datasets as stacked, overlapping density curves. They are useful for comparing +# distributions across categories or over time. UltraPlot provides +# :func:`~ultraplot.axes.PlotAxes.ridgeline` and :func:`~ultraplot.axes.PlotAxes.ridgelineh` +# for creating vertical and horizontal ridgeline plots. +# +# Ridgeline plots support two display modes: smooth kernel density estimation (KDE) +# by default, or histograms with the `hist` keyword. They also support two positioning +# modes: categorical positioning with evenly-spaced ridges (traditional joyplots), +# or continuous positioning where ridges are anchored to specific physical coordinates +# (useful for scientific plots like depth profiles or time series). + +# %% +import numpy as np + +import ultraplot as uplt + +# Sample data with different distributions +state = np.random.RandomState(51423) +data = [state.normal(i, 1, 500) for i in range(5)] +labels = [f"Distribution {i+1}" for i in range(5)] + +# Create figure with two subplots +fig, axs = uplt.subplots(ncols=2, figsize=(10, 5)) +axs.format( + abc="A.", abcloc="ul", grid=False, suptitle="Ridgeline plots: KDE vs Histogram" +) + +# KDE ridgeline (default) +axs[0].ridgeline( + data, labels=labels, overlap=0.6, cmap="viridis", alpha=0.7, linewidth=1.5 +) +axs[0].format(title="Kernel Density Estimation", xlabel="Value") + +# Histogram ridgeline +axs[1].ridgeline( + data, + labels=labels, + overlap=0.6, + cmap="plasma", + alpha=0.7, + hist=True, + bins=20, + linewidth=1.5, +) +axs[1].format(title="Histogram", xlabel="Value") + +# %% +import numpy as np + +import ultraplot as uplt + +# Sample data +state = np.random.RandomState(51423) +data1 = [state.normal(i * 0.5, 1, 400) for i in range(6)] +data2 = [state.normal(i, 0.8, 400) for i in range(4)] +labels1 = [f"Group {i+1}" for i in range(6)] +labels2 = ["Alpha", "Beta", "Gamma", "Delta"] + +# Create figure with vertical and horizontal orientations +fig, axs = uplt.subplots(ncols=2, figsize=(10, 5)) +axs.format(abc="A.", abcloc="ul", grid=False, suptitle="Ridgeline plot orientations") + +# Vertical ridgeline (default - ridges are horizontal) +axs[0].ridgeline( + data1, labels=labels1, overlap=0.7, cmap="coolwarm", alpha=0.8, linewidth=2 +) +axs[0].format(title="Vertical (ridgeline)", xlabel="Value") + +# Horizontal ridgeline (ridges are vertical) +axs[1].ridgelineh( + data2, labels=labels2, overlap=0.6, facecolor="skyblue", alpha=0.7, linewidth=1.5 +) +axs[1].format(title="Horizontal (ridgelineh)", ylabel="Value") + + +# %% [raw] raw_mimetype="text/restructuredtext" +# .. _ug_ridgeline_continuous: +# +# Continuous positioning +# ^^^^^^^^^^^^^^^^^^^^^^ +# +# For scientific applications, ridgeline plots can use continuous (coordinate-based) +# positioning where each ridge is anchored to a specific numerical coordinate along +# the axis. This is useful for visualizing how distributions change with physical +# variables like depth, time, altitude, or redshift. Use the `positions` parameter +# to specify coordinates, and optionally the `height` parameter to control ridge height +# in axis units. + +# %% +import numpy as np + +import ultraplot as uplt + +# Simulate ocean temperature data at different depths +state = np.random.RandomState(51423) +depths = [0, 10, 25, 50, 100] # meters +mean_temps = [25, 22, 18, 12, 8] # decreasing with depth +data = [state.normal(temp, 2, 400) for temp in mean_temps] +labels = ["Surface", "10m", "25m", "50m", "100m"] + +fig, ax = uplt.subplots(figsize=(8, 6)) +ax.ridgeline( + data, + labels=labels, + positions=depths, + height=8, # height in axis units + cmap="coolwarm", + alpha=0.75, + linewidth=2, +) +ax.format( + title="Ocean Temperature Distribution by Depth", + xlabel="Temperature (°C)", + ylabel="Depth (m)", + yreverse=True, # depth increases downward + grid=True, + gridcolor="gray5", + gridalpha=0.3, +) + +# %% +import numpy as np + +import ultraplot as uplt + +# Simulate climate data over time +state = np.random.RandomState(51423) +years = [1950, 1970, 1990, 2010, 2030] +mean_temps = [14.0, 14.2, 14.5, 15.0, 15.5] # warming trend +data = [state.normal(temp, 0.8, 500) for temp in mean_temps] + +fig, axs = uplt.subplots(ncols=2, figsize=(11, 5)) +axs.format(abc="A.", abcloc="ul", suptitle="Categorical vs Continuous positioning") + +# Categorical positioning (default) +axs[0].ridgeline( + data, labels=[str(y) for y in years], overlap=0.6, cmap="fire", alpha=0.7 +) +axs[0].format( + title="Categorical (traditional joyplot)", xlabel="Temperature (°C)", grid=False +) + +# Continuous positioning +axs[1].ridgeline( + data, + labels=[str(y) for y in years], + positions=years, + height=15, # height in year units + cmap="fire", + alpha=0.7, +) +axs[1].format( + title="Continuous (scientific)", + xlabel="Temperature (°C)", + ylabel="Year", + grid=True, + gridcolor="gray5", + gridalpha=0.3, +) diff --git a/ultraplot/axes/plot.py b/ultraplot/axes/plot.py index 515e83c08..267c7b185 100644 --- a/ultraplot/axes/plot.py +++ b/ultraplot/axes/plot.py @@ -5455,8 +5455,7 @@ def _apply_ridgeline( if facecolor is None: if cmap is not None: # Use colormap - if isinstance(cmap, str): - cmap = mcm.get_cmap(cmap) + cmap = constructor.Colormap(cmap) colors = [cmap(i / (n_ridges - 1)) for i in range(n_ridges)] else: # Use color cycle