From 7cadb41a75c52eefa187057479700f10e3551eeb Mon Sep 17 00:00:00 2001
From: Joel Ray Holveck <jholveck@nvidia.com>
Date: Sat, 20 Dec 2025 23:14:19 -0800
Subject: [PATCH 01/16] Basic draft of the video capture code.

I'll probably break this into a simple and advanced version too.
I may have to take out the audio code.

This also currently uses some of my work in the (unmerged) feat-buffer
branch, so I'll need to switch it to use what's available now.
---
 demos/video-capture.py | 199 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 199 insertions(+)
 create mode 100755 demos/video-capture.py

diff --git a/demos/video-capture.py b/demos/video-capture.py
new file mode 100755
index 0000000..c8a565d
--- /dev/null
+++ b/demos/video-capture.py
@@ -0,0 +1,199 @@
+#! /usr/bin/env python3
+
+from fractions import Fraction
+import queue
+import threading
+import time
+
+import av
+import numpy as np
+import soundcard as sc
+from tqdm.auto import trange
+
+import mss
+
+CODEC_OPTIONS_GLOBAL = {
+    "g": "60",              # GOP size: aim for about 2 sec
+    "bf": "2",              # enable bframes
+    "b": "6M",              # nominal average bitrate target
+    "maxrate": "12M",       # peak
+    "bufsize": "24M",       # VBV buffer; 1-4 seconds
+}
+
+# Some options are, of course, implementation-dependent.  I've
+# tried to make these basically similar, but for all I know, they
+# might actually produce significantly different output quality.
+CODECS = {
+    "h264_nvenc": {
+        "rc": "vbr",
+        "tune": "hq",
+        "cq": "23", # quality; similar spirit to CRF, but different
+        # The modern presets are the p# ones.  The others are
+        # deprecated, often aliases.
+        "preset": "p4",  # p1..p7 (higher = slower/better)
+        "rc-lookahead": "40",
+        "spatial-aq": "1",
+        "temporal-aq": "1",
+        "b_ref_mode": "1",
+    },
+    "libx264": {
+        # I think that with VBR enabled (as in the global options),
+        # libx264 ignores CRF.
+        "crf": "23",        # quality; lower=better/larger
+        "preset": "medium", # speed/quality trade-off
+        "rc-lookahead": "40",
+        "aq-mode": "3",
+    },
+}
+
+
+def main():
+    av.logging.set_level(av.logging.VERBOSE)
+
+    fps = 60
+    monitor_id = 1
+    duration_secs = 30
+    codec = None
+
+    if codec is None:
+        for codec in CODECS:
+            try:
+                # This normalizes the name.
+                av.codec.Codec(codec, "w")
+                break
+            except av.codec.codec.UnknownCodecError:
+                pass
+        else:
+            raise RuntimeError("No viable H.264 codec found")
+    else:
+        # Normalize the name, for the options lookup.
+        codec = av.codec.Codec(codec, "w").name
+
+    mic = sc.get_microphone("loopback")
+
+    with mss.mss() as sct:
+        monitor = sct.monitors[monitor_id]
+
+        with av.open("capture.mp4", "w", format="mp4") as avmux:
+            time_denom = 90000  # This is a widely-used standard
+            time_base = Fraction(1, time_denom)
+
+            audio_stream = avmux.add_stream("opus", options={"b": "64k"})
+            audio_stream.time_base = time_base
+            # We pre-open the codec, to make sure there's not a warmup frame.
+            audio_stream.open()
+
+            options = dict(CODEC_OPTIONS_GLOBAL)
+            if codec in CODECS:
+                options.update(CODECS[codec])
+            video_stream = avmux.add_stream(codec, rate=fps, options=options)
+            video_stream.width = monitor["width"]
+            video_stream.height = monitor["height"]
+            video_stream.time_base = time_base
+            if any(f.name == "bgra" for f in video_stream.codec.video_formats):
+                video_stream.pix_fmt = "bgra"
+            # We pre-open the codec, to make sure there's not a warmup frame.
+            video_stream.open()
+
+            def pipeline(q_input, fn, q_output):
+                try:
+                    while True:
+                        try:
+                            val_input = q_input.get(timeout=5)
+                        except queue.ShutDown:
+                            break
+                        val_output = fn(val_input)
+                        if q_output is not None:
+                            q_output.put(val_output, timeout=5)
+                finally:
+                    q_input.shutdown()
+                    if q_output is not None:
+                        q_output.shutdown()
+
+            q_audio_preprocess = queue.Queue(1)
+            q_audio_encode = queue.Queue(1)
+            q_video_preprocess = queue.Queue(1)
+            q_video_encode = queue.Queue(1)
+            q_mux = queue.Queue(1)
+
+            def video_capture():
+                try:
+                    next_frame_at = first_frame_at
+                    for i in trange(duration_secs * fps):
+                        while ((now := time.clock_gettime(time.CLOCK_MONOTONIC)) < next_frame_at):
+                            time.sleep(next_frame_at - now)
+                        # I think there's an easy way to make this a leaky bucket, but can't quite
+                        # think through the math right now.
+                        next_frame_at = next_frame_at + 1/fps
+                        screenshot = sct.grab(monitor)
+                        q_video_preprocess.put((screenshot, now), timeout=5)
+                finally:
+                    q_video_preprocess.shutdown()
+
+            def video_preprocess(screenshot_and_timestamp):
+                (screenshot, timestamp) = screenshot_and_timestamp
+
+                ndarray = np.frombuffer(screenshot.buffer(), dtype=np.uint8)
+                ndarray = ndarray.reshape(monitor["height"], monitor["width"], 4)
+                # from_numpy_buffer isn't documented. from_ndarray is,
+                # but that copies the data.  That's slow enough to
+                # slow things down to the point of being a bottleneck!
+                frame = av.VideoFrame.from_numpy_buffer(ndarray, format="bgra")
+
+                frame.pts = int((timestamp - first_frame_at) * 90000)
+                frame.time_base = Fraction(1, 90000)
+                return frame
+
+            video_encode = video_stream.encode
+
+            def audio_preprocess(audio_and_timestamp):
+                (audio, timestamp) = audio_and_timestamp
+                audio = audio.reshape(1, -1)
+                frame = av.AudioFrame.from_ndarray(audio, format='flt', layout='stereo')
+                frame.sample_rate = 48000
+                frame.pts = int((timestamp - first_frame_at) * 90000)
+                frame.time_base = Fraction(1, 90000)
+                return frame
+
+            audio_encode = audio_stream.encode
+
+            t_video_capture = threading.Thread(target=video_capture, name="video_capture")
+            t_video_preprocess = threading.Thread(target=pipeline, args=(q_video_preprocess, video_preprocess, q_video_encode), name="video_preprocess")
+            t_video_encode = threading.Thread(target=pipeline, args=(q_video_encode, video_encode, q_mux), name="video_encode")
+            t_audio_preprocess = threading.Thread(target=pipeline, args=(q_audio_preprocess, audio_preprocess, q_audio_encode), name="audio_preprocess")
+            t_audio_encode = threading.Thread(target=pipeline, args=(q_audio_encode, audio_encode, q_mux), name="audio_encode")
+            t_mux = threading.Thread(target=pipeline, args=(q_mux, avmux.mux, None), name="mux")
+
+            first_frame_at = time.clock_gettime(time.CLOCK_MONOTONIC)
+            t_mux.start()
+            t_video_encode.start()
+            t_video_preprocess.start()
+            t_audio_encode.start()
+            t_audio_preprocess.start()
+            t_video_capture.start()
+
+            print("Capture:   ", t_video_capture.native_id)
+            print("Preprocess:", t_video_preprocess.native_id)
+            print("Encode:    ", t_video_encode.native_id)
+            print("Mux:       ", t_mux.native_id)
+
+            with mic.recorder(samplerate=48000) as audio_recorder:
+                while t_video_capture.is_alive():
+                    data = audio_recorder.record()
+                    now = time.clock_gettime(time.CLOCK_MONOTONIC)
+                    timestamp = now - audio_recorder.latency
+                    q_audio_preprocess.put((data, timestamp))
+
+            t_video_capture.join()
+            t_video_preprocess.join()
+            t_video_encode.join()
+            t_audio_preprocess.join()
+            t_audio_encode.join()
+            t_mux.join()
+
+            print(f"Used format {video_stream.format}, "
+                  f"reformatter {video_stream.reformatter}")
+
+
+if __name__ == "__main__":
+    main()

From ca91fe515b1d14a6b6a693e4129266526b071904 Mon Sep 17 00:00:00 2001
From: Joel Ray Holveck <jholveck@nvidia.com>
Date: Sat, 10 Jan 2026 23:40:52 -0800
Subject: [PATCH 02/16] Work the video demo to a more viable form

---
 demos/common/__init__.py |   0
 demos/common/pipeline.py | 300 ++++++++++++++++++++++
 demos/tinytv-stream.py   | 303 +---------------------
 demos/video-capture.py   | 524 ++++++++++++++++++++++++++-------------
 4 files changed, 658 insertions(+), 469 deletions(-)
 create mode 100644 demos/common/__init__.py
 create mode 100644 demos/common/pipeline.py

diff --git a/demos/common/__init__.py b/demos/common/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/demos/common/pipeline.py b/demos/common/pipeline.py
new file mode 100644
index 0000000..6ae9d67
--- /dev/null
+++ b/demos/common/pipeline.py
@@ -0,0 +1,300 @@
+from __future__ import annotations
+
+import contextlib
+import itertools
+from collections.abc import Callable, Generator, Iterable, Iterator
+from threading import Condition, Lock, Thread
+from typing import Generic, TypeVar, overload
+
+T = TypeVar("T")
+U = TypeVar("U")
+
+
+class MailboxShutDown(Exception):  # noqa: N818 (An exception, but not an error)
+    """Exception to indicate that a Mailbox has been shut down.
+
+    This will be raised if Mailbox.get() or Mailbox.put() is run on a
+    mailbox after its .shutdown() method has been called, or if it is
+    called while waiting.
+    """
+
+    def __init__(self, mailbox: Mailbox) -> None:
+        #: The mailbox that was shut down
+        self.mailbox = mailbox
+
+    def __str__(self) -> str:
+        return f"Mailbox shut down: {self.mailbox}"
+
+
+class Mailbox(Generic[T]):
+    """Thread-safe container to pass a single object at a time between threads.
+
+    A Mailbox can be shut down to indicate that it is no longer
+    available.  This can be used by a producer to indicate that no
+    more items will be forthcoming, or by a consumer to indicate that
+    it is no longer able to accept more objects.
+
+    In Python 3.13, this has the same basic functionality as
+    queue.Queue(1).  Prior to 3.13, there was no
+    queue.Queue.shutdown() method.  The mechanisms for using mailboxes
+    as iterables, or adding items from iterables, are also not part of
+    queue.Queue in any version of Python.
+    """
+
+    def __init__(self) -> None:
+        #: Lock to protect mailbox state
+        self.lock = Lock()
+        self._condition = Condition(lock=self.lock)
+        #: Indicates whether an item is present in the mailbox
+        self.has_item = False
+        self._item: T | None = None
+        #: Indicates whether the mailbox has been shut down
+        self.is_shutdown = False
+
+    def get(self) -> T:
+        """Return and remove the item being held by the mailbox.
+
+        If an item is not presently available, block until another
+        thread calls .put().
+        """
+        with self._condition:
+            while True:
+                # We test to see if an item is present before testing if the queue is shut down.  This is so that a
+                # non-immediate shutdown allows the mailbox to be drained.
+                if self.has_item:
+                    rv = self._item
+                    self._item = None  # Don't hold an unnecessary reference
+                    self.has_item = False
+                    self._condition.notify_all()
+                    return rv  # type:ignore[return-value]
+                if self.is_shutdown:
+                    raise MailboxShutDown(self)
+                self._condition.wait()
+
+    def get_many(self) -> Iterable[T]:
+        """Yield items as they appear in the mailbox.
+
+        The iterator exits the mailbox is shut down; MailboxShutDown
+        is not raised into the caller.
+        """
+        return iter(self)
+
+    def put(self, item: T) -> None:
+        """Store an item in the mailbox.
+
+        If an item is already in the mailbox, block until another
+        thread calls .get().
+        """
+        with self._condition:
+            while True:
+                if self.is_shutdown:
+                    raise MailboxShutDown(self)
+                if not self.has_item:
+                    self._item = item
+                    self.has_item = True
+                    self._condition.notify()
+                    return
+                self._condition.wait()
+
+    def put_many(self, items: Iterable[T]) -> Iterator[T]:
+        """Put the elements of iterable in the mailbox, one at a time.
+
+        If the mailbox is shut down before all the elements can be put
+        into it, a MailboxShutDown exception is _not_ raised.
+
+        Returns an iterator containing any remaining items, including
+        the one that was being processed when the mailbox was shut
+        down.  The first item (if any) of this iterator can be
+        immediately accessed with next; subsequent items defer to the
+        input iterable, so may block.
+        """
+        iterator = iter(items)
+        for item in iterator:
+            # We put this try/except inside the for loop, to make sure we don't accidentally filter out an exception
+            # that escaped the items iterator.
+            try:
+                self.put(item)
+            except MailboxShutDown:
+                return itertools.chain([item], iterator)
+            # Remove references to the value once it's not needed.  This lets objects with advanced buffer semantics
+            # reclaim the object's memory immediately, without waiting for the next iteration of the iterable.
+            del item
+        return iter([])
+
+    def shutdown(self, *, immediate: bool = False) -> None:
+        """Shut down the mailbox, marking it as unavailable for future use.
+
+        Any callers currently blocked in .get or .put, or any future
+        caller to those methods, will recieve a MailboxShutDown
+        exception.  Callers using .get_many or iterating over the
+        mailbox will see the iteration end.  Callers to .put_many will
+        stop adding items.
+
+        If immediate is False (the default), and an item is currently
+        in the mailbox, it will be returned by the next call to
+        .get(), and the one after that will raise MailboxShutDown.
+
+        It is safe to call this method multiple times, including to
+        promote a non-immediate shutdown to an immediate one.
+        """
+        with self._condition:
+            # We don't actually need to check whether we've been called already.
+            self.is_shutdown = True
+            if immediate:
+                self._item = None
+                self.has_item = False
+            self._condition.notify_all()
+
+    def __iter__(self) -> Iterator[T]:
+        """Yield items as they appear in the mailbox.
+
+        The iterator exits when the mailbox is shut down;
+        MailboxShutDown is not raised into the caller.
+        """
+        with contextlib.suppress(MailboxShutDown):
+            while True:
+                yield self.get()
+
+
+class PipelineStage(Thread, Generic[T, U]):
+    """A stage of a multi-threaded pipeline.
+
+    The target function will be called once, and should yield one
+    value for each element.
+
+    If an in_mailbox is provided, the function will get an iterable of
+    its successive elements.  If an out_mailbox is provided, it will
+    be supplied with the successive outputs of the target function.
+
+    If the either mailbox is shut down, the target function's loop
+    will stop being called.  Both mailboxes will be shut down when the
+    target function ends.
+
+    Note to readers adapting this class to their own programs:
+
+    This is designed for linear pipelines: it is not meant to support
+    fan-in (multiple stages feeding one mailbox) or fan-out (one
+    mailbox feeding multiple stages).  The shutdown semantics of these
+    sorts of pipelines will depend heavily on what it's used for, and
+    this demo only needs a simple pipeline.
+    """
+
+    # Source stage
+    @overload
+    def __init__(
+        self,
+        target: Callable[[], Generator[U]],
+        *,
+        out_mailbox: Mailbox[U],
+        name: str | None = None,
+    ) -> None: ...
+
+    # Transformer stage
+    @overload
+    def __init__(
+        self,
+        target: Callable[[Iterable[T]], Generator[U]],
+        *,
+        in_mailbox: Mailbox[T],
+        out_mailbox: Mailbox[U],
+        name: str | None = None,
+    ) -> None: ...
+
+    # Sink stage
+    @overload
+    def __init__(
+        self,
+        target: Callable[[Iterable[T]], None],
+        *,
+        in_mailbox: Mailbox[T],
+        name: str | None = None,
+    ) -> None: ...
+
+    def __init__(
+        self,
+        target: Callable[[], Generator[U]] | Callable[[Iterable[T]], Generator[U]] | Callable[[Iterable[T]], None],
+        *,
+        in_mailbox: Mailbox[T] | None = None,
+        out_mailbox: Mailbox[U] | None = None,
+        name: str | None = None,
+    ) -> None:
+        """Initialize the PipelineStage.
+
+        Either :param:`in_mailbox` or :param:`out_mailbox` is
+        required.  Otherwise, it would be a pipeline stage that can't
+        connect to anything else.  (You can always use
+        :class:`threading.Thread` directly if you need that behavior.)
+
+        :param target: Function to run during the stage.  This will be
+            called once, in a separate thread.  This should take one
+            argument if :param:`in_mailbox` is provided, or no
+            arguments otherwise.  If you want additional arguments
+            (such as configuration), use :func:`functools.partial`.
+        :param in_mailbox: An optional :class:`Mailbox` to provide
+            inputs to the target function.  The target function will
+            be called with one argument, an iterable that you can use
+            in a for loop or similar construct, to get the successive
+            values.
+        :param out_mailbox: An optional :class:`Mailbox` to receive
+            outputs from the target function.  If this is provided,
+            the target function must be a generator (a function that
+            uses ``yield`` instead of ``return``).  The successive
+            outputs from the function will be placed in
+            :param:`out_mailbox`.
+        :param name: An optional name for debugging purposes; see
+            :attr:`threading.Thread.name`.
+        """
+        if in_mailbox is None and out_mailbox is None:
+            msg = "Cannot have a pipeline stage with neither inputs nor outputs"
+            raise ValueError(msg)
+        self.in_mailbox = in_mailbox
+        self.out_mailbox = out_mailbox
+        self.target = target
+        #: The exception (if any) raised by the target function
+        self.exc: Exception | None = None
+        super().__init__(name=name, daemon=True)
+
+    def run(self) -> None:
+        """Execute the pipeline stage.
+
+        This should not be run directly.  Instead, use the start()
+        method (inherited from threading.Thread) to run this in a
+        background thread.
+
+        This will run the target function, managing input and output
+        mailboxes.  When the stage completes, whether normally or with
+        an error, the mailboxes will be shut down.
+        """
+        try:
+            if self.out_mailbox is None:
+                # This is a sink function, the easiest to deal with.  Since a mailbox is iterable, we can just pass it
+                # to the target function.
+                assert self.in_mailbox is not None  # noqa: S101
+                self.target(self.in_mailbox)  # type:ignore[call-arg]
+                return
+            # This is a source or transformation function.
+            out_iterable = self.target() if self.in_mailbox is None else self.target(self.in_mailbox)  # type:ignore[call-arg]
+            if not isinstance(out_iterable, Generator):
+                msg = (
+                    "Pipeline target function was expected to be a generator; "
+                    f"instead, it returned a {type(out_iterable)}."
+                )
+                raise TypeError(msg)  # noqa: TRY301
+            # Once a generator is closed, the yield call (where they block when they send an object downstream) will
+            # raise GeneratorExit.  That lets finally: blocks, with: exits, etc. run.  This happens automatically when
+            # out_iterable is garbage-collected.  We still close it explicitly to so it gets the GeneratorExit, in case
+            # something (like an exception object) is holding a reference to out_iterable.
+            with contextlib.closing(out_iterable):
+                self.out_mailbox.put_many(out_iterable)
+        except Exception as e:
+            # We store the exception, so that our caller can choose what to do about it after they call join.
+            self.exc = e
+            raise
+        finally:
+            if self.in_mailbox is not None:
+                self.in_mailbox.shutdown()
+            if self.out_mailbox is not None:
+                self.out_mailbox.shutdown()
+
+    def __str__(self) -> str:
+        return f"<PipelineStage {self.name} running={self.is_alive()} thread_id={self.native_id}>"
diff --git a/demos/tinytv-stream.py b/demos/tinytv-stream.py
index dcbcb18..a399389 100755
--- a/demos/tinytv-stream.py
+++ b/demos/tinytv-stream.py
@@ -132,19 +132,15 @@
 from __future__ import annotations
 
 import argparse
-import contextlib
 import functools
 import io
-import itertools
 import logging
 import os
 import re
 import sys
 import time
 from collections import deque
-from collections.abc import Generator, Iterable, Iterator
-from threading import Condition, Lock, Thread
-from typing import TYPE_CHECKING, Generic, Literal, TypeVar, overload
+from typing import TYPE_CHECKING, Literal
 
 import serial
 from PIL import Image, ImageOps
@@ -153,8 +149,10 @@
 
 import mss
 
+from common.pipeline import Mailbox, PipelineStage
+
 if TYPE_CHECKING:
-    from collections.abc import Callable
+    from collections.abc import Generator, Iterable
 
 # The keys in this are substrings in the tvType query.  Make sure that they're all distinct: having both "TinyTV2" and
 # "TinyTV2.1" in here would mean that a 2.1 might be misidentified as a 2.  We use substrings instead of parsing the
@@ -191,302 +189,9 @@
 DEFAULT_JPEG_QUALITY = 75
 
 
-T = TypeVar("T")
-U = TypeVar("U")
-
 LOGGER = logging.getLogger("tinytv-stream")
 
 
-class MailboxShutDown(Exception):  # noqa: N818 (An exception, but not an error)
-    """Exception to indicate that a Mailbox has been shut down.
-
-    This will be raised if Mailbox.get() or Mailbox.put() is run on a
-    mailbox after its .shutdown() method has been called, or if it is
-    called while waiting.
-    """
-
-    def __init__(self, mailbox: Mailbox) -> None:
-        #: The mailbox that was shut down
-        self.mailbox = mailbox
-
-    def __str__(self) -> str:
-        return f"Mailbox shut down: {self.mailbox}"
-
-
-class Mailbox(Generic[T]):
-    """Thread-safe container to pass a single object at a time between threads.
-
-    A Mailbox can be shut down to indicate that it is no longer
-    available.  This can be used by a producer to indicate that no
-    more items will be forthcoming, or by a consumer to indicate that
-    it is no longer able to accept more objects.
-
-    In Python 3.13, this has the same basic functionality as
-    queue.Queue(1).  Prior to 3.13, there was no
-    queue.Queue.shutdown() method.  The mechanisms for using mailboxes
-    as iterables, or adding items from iterables, are also not part of
-    queue.Queue in any version of Python.
-    """
-
-    def __init__(self) -> None:
-        #: Lock to protect mailbox state
-        self.lock = Lock()
-        self._condition = Condition(lock=self.lock)
-        #: Indicates whether an item is present in the mailbox
-        self.has_item = False
-        self._item: T | None = None
-        #: Indicates whether the mailbox has been shut down
-        self.is_shutdown = False
-
-    def get(self) -> T:
-        """Return and remove the item being held by the mailbox.
-
-        If an item is not presently available, block until another
-        thread calls .put().
-        """
-        with self._condition:
-            while True:
-                # We test to see if an item is present before testing if the queue is shut down.  This is so that a
-                # non-immediate shutdown allows the mailbox to be drained.
-                if self.has_item:
-                    rv = self._item
-                    self._item = None  # Don't hold an unnecessary reference
-                    self.has_item = False
-                    self._condition.notify_all()
-                    return rv  # type:ignore[return-value]
-                if self.is_shutdown:
-                    raise MailboxShutDown(self)
-                self._condition.wait()
-
-    def get_many(self) -> Iterable[T]:
-        """Yield items as they appear in the mailbox.
-
-        The iterator exits the mailbox is shut down; MailboxShutDown
-        is not raised into the caller.
-        """
-        return iter(self)
-
-    def put(self, item: T) -> None:
-        """Store an item in the mailbox.
-
-        If an item is already in the mailbox, block until another
-        thread calls .get().
-        """
-        with self._condition:
-            while True:
-                if self.is_shutdown:
-                    raise MailboxShutDown(self)
-                if not self.has_item:
-                    self._item = item
-                    self.has_item = True
-                    self._condition.notify()
-                    return
-                self._condition.wait()
-
-    def put_many(self, items: Iterable[T]) -> Iterator[T]:
-        """Put the elements of iterable in the mailbox, one at a time.
-
-        If the mailbox is shut down before all the elements can be put
-        into it, a MailboxShutDown exception is _not_ raised.
-
-        Returns an iterator containing any remaining items, including
-        the one that was being processed when the mailbox was shut
-        down.  The first item (if any) of this iterator can be
-        immediately accessed with next; subsequent items defer to the
-        input iterable, so may block.
-        """
-        iterator = iter(items)
-        for item in iterator:
-            # We put this try/except inside the for loop, to make sure we don't accidentally filter out an exception
-            # that escaped the items iterator.
-            try:
-                self.put(item)
-            except MailboxShutDown:
-                return itertools.chain([item], iterator)
-            # Remove references to the value once it's not needed.  This lets objects with advanced buffer semantics
-            # reclaim the object's memory immediately, without waiting for the next iteration of the iterable.
-            del item
-        return iter([])
-
-    def shutdown(self, *, immediate: bool = False) -> None:
-        """Shut down the mailbox, marking it as unavailable for future use.
-
-        Any callers currently blocked in .get or .put, or any future
-        caller to those methods, will recieve a MailboxShutDown
-        exception.  Callers using .get_many or iterating over the
-        mailbox will see the iteration end.  Callers to .put_many will
-        stop adding items.
-
-        If immediate is False (the default), and an item is currently
-        in the mailbox, it will be returned by the next call to
-        .get(), and the one after that will raise MailboxShutDown.
-
-        It is safe to call this method multiple times, including to
-        promote a non-immediate shutdown to an immediate one.
-        """
-        with self._condition:
-            # We don't actually need to check whether we've been called already.
-            self.is_shutdown = True
-            if immediate:
-                self._item = None
-                self.has_item = False
-            self._condition.notify_all()
-
-    def __iter__(self) -> Iterator[T]:
-        """Yield items as they appear in the mailbox.
-
-        The iterator exits when the mailbox is shut down;
-        MailboxShutDown is not raised into the caller.
-        """
-        with contextlib.suppress(MailboxShutDown):
-            while True:
-                yield self.get()
-
-
-class PipelineStage(Thread, Generic[T, U]):
-    """A stage of a multi-threaded pipeline.
-
-    The target function will be called once, and should yield one
-    value for each element.
-
-    If an in_mailbox is provided, the function will get an iterable of
-    its successive elements.  If an out_mailbox is provided, it will
-    be supplied with the successive outputs of the target function.
-
-    If the either mailbox is shut down, the target function's loop
-    will stop being called.  Both mailboxes will be shut down when the
-    target function ends.
-
-    Note to readers adapting this class to their own programs:
-
-    This is designed for linear pipelines: it is not meant to support
-    fan-in (multiple stages feeding one mailbox) or fan-out (one
-    mailbox feeding multiple stages).  The shutdown semantics of these
-    sorts of pipelines will depend heavily on what it's used for, and
-    this demo only needs a simple pipeline.
-    """
-
-    # Source stage
-    @overload
-    def __init__(
-        self,
-        target: Callable[[], Generator[U]],
-        *,
-        out_mailbox: Mailbox[U],
-        name: str | None = None,
-    ) -> None: ...
-
-    # Transformer stage
-    @overload
-    def __init__(
-        self,
-        target: Callable[[Iterable[T]], Generator[U]],
-        *,
-        in_mailbox: Mailbox[T],
-        out_mailbox: Mailbox[U],
-        name: str | None = None,
-    ) -> None: ...
-
-    # Sink stage
-    @overload
-    def __init__(
-        self,
-        target: Callable[[Iterable[T]], None],
-        *,
-        in_mailbox: Mailbox[T],
-        name: str | None = None,
-    ) -> None: ...
-
-    def __init__(
-        self,
-        target: Callable[[], Generator[U]] | Callable[[Iterable[T]], Generator[U]] | Callable[[Iterable[T]], None],
-        *,
-        in_mailbox: Mailbox[T] | None = None,
-        out_mailbox: Mailbox[U] | None = None,
-        name: str | None = None,
-    ) -> None:
-        """Initialize the PipelineStage.
-
-        Either :param:`in_mailbox` or :param:`out_mailbox` is
-        required.  Otherwise, it would be a pipeline stage that can't
-        connect to anything else.  (You can always use
-        :class:`threading.Thread` directly if you need that behavior.)
-
-        :param target: Function to run during the stage.  This will be
-            called once, in a separate thread.  This should take one
-            argument if :param:`in_mailbox` is provided, or no
-            arguments otherwise.  If you want additional arguments
-            (such as configuration), use :func:`functools.partial`.
-        :param in_mailbox: An optional :class:`Mailbox` to provide
-            inputs to the target function.  The target function will
-            be called with one argument, an iterable that you can use
-            in a for loop or similar construct, to get the successive
-            values.
-        :param out_mailbox: An optional :class:`Mailbox` to receive
-            outputs from the target function.  If this is provided,
-            the target function must be a generator (a function that
-            uses ``yield`` instead of ``return``).  The successive
-            outputs from the function will be placed in
-            :param:`out_mailbox`.
-        :param name: An optional name for debugging purposes; see
-            :attr:`threading.Thread.name`.
-        """
-        if in_mailbox is None and out_mailbox is None:
-            msg = "Cannot have a pipeline stage with neither inputs nor outputs"
-            raise ValueError(msg)
-        self.in_mailbox = in_mailbox
-        self.out_mailbox = out_mailbox
-        self.target = target
-        #: The exception (if any) raised by the target function
-        self.exc: Exception | None = None
-        super().__init__(name=name, daemon=True)
-
-    def run(self) -> None:
-        """Execute the pipeline stage.
-
-        This should not be run directly.  Instead, use the start()
-        method (inherited from threading.Thread) to run this in a
-        background thread.
-
-        This will run the target function, managing input and output
-        mailboxes.  When the stage completes, whether normally or with
-        an error, the mailboxes will be shut down.
-        """
-        try:
-            if self.out_mailbox is None:
-                # This is a sink function, the easiest to deal with.  Since a mailbox is iterable, we can just pass it
-                # to the target function.
-                assert self.in_mailbox is not None  # noqa: S101
-                self.target(self.in_mailbox)  # type:ignore[call-arg]
-                return
-            # This is a source or transformation function.
-            out_iterable = self.target() if self.in_mailbox is None else self.target(self.in_mailbox)  # type:ignore[call-arg]
-            if not isinstance(out_iterable, Generator):
-                msg = (
-                    "Pipeline target function was expected to be a generator; "
-                    f"instead, it returned a {type(out_iterable)}."
-                )
-                raise TypeError(msg)  # noqa: TRY301
-            # Once a generator is closed, the yield call (where they block when they send an object downstream) will
-            # raise GeneratorExit.  That lets finally: blocks, with: exits, etc. run.  This happens automatically when
-            # out_iterable is garbage-collected.  We still close it explicitly to so it gets the GeneratorExit, in case
-            # something (like an exception object) is holding a reference to out_iterable.
-            with contextlib.closing(out_iterable):
-                self.out_mailbox.put_many(out_iterable)
-        except Exception as e:
-            # We store the exception, so that our caller can choose what to do about it after they call join.
-            self.exc = e
-            raise
-        finally:
-            if self.in_mailbox is not None:
-                self.in_mailbox.shutdown()
-            if self.out_mailbox is not None:
-                self.out_mailbox.shutdown()
-
-    def __str__(self) -> str:
-        return f"<PipelineStage {self.name} running={self.is_alive()} thread_id={self.native_id}>"
-
-
 def list_devices() -> None:
     """Display all USB serial ports in a formatted table."""
     ports = list(list_ports.comports())
diff --git a/demos/video-capture.py b/demos/video-capture.py
index c8a565d..b048ee9 100755
--- a/demos/video-capture.py
+++ b/demos/video-capture.py
@@ -1,198 +1,382 @@
 #! /usr/bin/env python3
 
-from fractions import Fraction
-import queue
-import threading
+# In one test, here's some numbers this program could achieve.  This
+# is just meant as a rough guide; your results will almost certainly
+# vary significantly.
+# - libx264, 1920x1080: 80 fps
+# - libx264, 3840x2160: 18 fps
+# - h264_nvenc, 1920x1080: 190 fps
+# - h264_nvenc, 3840x2160: 41 fps
+
+import argparse
+import logging
+import signal
 import time
+from collections import deque
+from collections.abc import Generator, Iterable, Sequence
+from fractions import Fraction
+from functools import partial
+from math import floor
+from threading import Event
+from typing import Any
 
 import av
 import numpy as np
-import soundcard as sc
-from tqdm.auto import trange
+from common.pipeline import Mailbox, PipelineStage
+from si_prefix import si_format
 
 import mss
 
-CODEC_OPTIONS_GLOBAL = {
-    "g": "60",              # GOP size: aim for about 2 sec
-    "bf": "2",              # enable bframes
-    "b": "6M",              # nominal average bitrate target
-    "maxrate": "12M",       # peak
-    "bufsize": "24M",       # VBV buffer; 1-4 seconds
-}
+# These are the options you'd give to ffmpeg that would affect the
+# video codec.
+CODEC_OPTIONS = {
+    # The "high" profile means that the encoder can use some H.264
+    # features that are widely supported, but not mandatory.
+    "profile": "high",
+
+    # The "medium" preset is as good of a preset as any for a demo
+    # like this.  Different codecs have different presets; the the
+    # h264_nvenc actually prefers "p4", but accepts "medium" as a
+    # similar preset.
+    "preset": "medium",
 
-# Some options are, of course, implementation-dependent.  I've
-# tried to make these basically similar, but for all I know, they
-# might actually produce significantly different output quality.
-CODECS = {
-    "h264_nvenc": {
-        "rc": "vbr",
-        "tune": "hq",
-        "cq": "23", # quality; similar spirit to CRF, but different
-        # The modern presets are the p# ones.  The others are
-        # deprecated, often aliases.
-        "preset": "p4",  # p1..p7 (higher = slower/better)
-        "rc-lookahead": "40",
-        "spatial-aq": "1",
-        "temporal-aq": "1",
-        "b_ref_mode": "1",
-    },
-    "libx264": {
-        # I think that with VBR enabled (as in the global options),
-        # libx264 ignores CRF.
-        "crf": "23",        # quality; lower=better/larger
-        "preset": "medium", # speed/quality trade-off
-        "rc-lookahead": "40",
-        "aq-mode": "3",
-    },
+    # 6 Mbit/sec is vaguely the ballpark for a good-quality video at
+    # 1080p and 30 fps, but there's a lot of variation.  We're just
+    # giving the target bitrate: the second-to-second bitrate will
+    # vary a lot, and slowly approach this bitrate.  If you're trying
+    # this on a nearly-still screen, though, then the actual bitrate
+    # will be much lower, since there's not much motion to encode!
+    "b": "6M",
+
+    # Let the encoder hold some frames for analysis, and flush them
+    # later.  This especially helps with the hardware-accelerated
+    # codecs.
+    "rc-lookahead": "40",
 }
 
+# There are a lot of different places in a video encoding pipeline
+# where time_base matters, and they don't necessarily have to be the
+# same, so the time base has to be set on several objects.  In this
+# program, we do use a common time base of 1/90000 seconds everywhere.
+# This is a common standard, from the MPEG world.
+TIME_BASE = Fraction(1, 90000)
 
-def main():
-    av.logging.set_level(av.logging.VERBOSE)
+LOGGER = logging.getLogger("video-capture")
 
-    fps = 60
-    monitor_id = 1
-    duration_secs = 30
-    codec = None
-
-    if codec is None:
-        for codec in CODECS:
-            try:
-                # This normalizes the name.
-                av.codec.Codec(codec, "w")
-                break
-            except av.codec.codec.UnknownCodecError:
-                pass
-        else:
-            raise RuntimeError("No viable H.264 codec found")
-    else:
-        # Normalize the name, for the options lookup.
-        codec = av.codec.Codec(codec, "w").name
+def video_capture(
+    fps: int,
+    sct: mss.base.MSSBase,
+    monitor: mss.models.Monitor,
+    shutdown_requested: Event,
+) -> Generator[tuple[mss.screenshot.ScreenShot, float], None, None]:
+    next_frame_at = time.monotonic()
+    capture_period = 1 / fps
+    while not shutdown_requested.is_set():
+        # Wait until we're ready.
+        while (now := time.monotonic()) < next_frame_at:
+            time.sleep(next_frame_at - now)
 
-    mic = sc.get_microphone("loopback")
+        # Capture and yield a frame.
+        screenshot = sct.grab(monitor)
+        yield screenshot, now
 
-    with mss.mss() as sct:
-        monitor = sct.monitors[monitor_id]
+        # We try to keep the capture rate at the desired fps on
+        # average.  If we can't quite keep up for a moment (such as if
+        # the computer is a little overloaded), then we'll accumulate
+        # a bit of "timing debt" in next_frame_at: it'll be a little
+        # sooner than now + one frame.  We'll hopefully be able to
+        # catch up soon.
+        next_frame_at = next_frame_at + capture_period
+
+        # If we've accumulated over one frame's worth of catch-up,
+        # then that will say that next_frame_at is sooner than now.
+        # If we're accumulating too much debt, we want to wipe it out,
+        # rather than having a huge burst of closely-spaced captures
+        # as soon as we can get back to our desired capture rate.
+        # When we wipe that out, we still try to preserve the timing
+        # cycle's phase to keep the capture cadence smooth, rather
+        # than having a jittery burst of closely-spaced captures.  In
+        # other words, we increment next_frame_at by a multiple of the
+        # desired capture period.
+        if next_frame_at < now:
+            missed_frames = floor((now - next_frame_at) * fps)
+            next_frame_at += (missed_frames + 1) * capture_period
+
+
+def video_process(
+    screenshot_and_timestamp: Iterable[
+        tuple[mss.screenshot.ScreenShot, float]
+    ],
+) -> Generator[av.VideoFrame, None, None]:
+    first_frame_at: float | None = None
+    for screenshot, timestamp in screenshot_and_timestamp:
+        ndarray = np.frombuffer(screenshot.bgra, dtype=np.uint8)
+        ndarray = ndarray.reshape(screenshot.height, screenshot.width, 4)
+        # from_numpy_buffer isn't documented.  from_ndarray is, but
+        # that copies the data.  That's slow enough to slow things
+        # down to the point of being a real bottleneck!
+        frame = av.VideoFrame.from_numpy_buffer(ndarray, format="bgra")
+        if first_frame_at is None:
+            first_frame_at = timestamp
+        frame.pts = int((timestamp - first_frame_at) / TIME_BASE)
+        frame.time_base = TIME_BASE
+        yield frame
+
+
+def video_encode(
+    video_stream: av.video.stream.VideoStream, frames: Iterable[av.VideoFrame]
+) -> Generator[Sequence[av.Packet], None, None]:
+    for frame in frames:
+        yield video_stream.encode(frame)
+    # Our input has run out.  Flush the frames that the encoder still
+    # is holding internally (such as to compute B-frames).
+    yield video_stream.encode(None)
+
+
+def show_stats(packet_batches: Iterable[Sequence[av.Packet]]) -> Iterable[Sequence[av.Packet]]:
+    """Display streaming statistics (FPS and throughput).
+
+    Statistics are displayed over a 100-frame sliding window.
+
+    FPS indicates how fast the entire pipeline can run as a whole, not
+    any individual stage.
+    """
+    # The start time is only used for showing the clock.  The actual
+    # timing stats all use the times we put in the captured frames.
+    start_time = time.monotonic()
+    time_deque: deque[int] = deque(maxlen=100)
+    bit_count_deque: deque[int] = deque(maxlen=100)
+    next_display_update = 0.0
+    last_status_len = 0
+
+    for frame_count, packet_batch in enumerate(packet_batches):
+        # Yield the packet data immediately, so the mux gets it as
+        # soon as possible, while we update our stats.
+        yield packet_batch
 
-        with av.open("capture.mp4", "w", format="mp4") as avmux:
-            time_denom = 90000  # This is a widely-used standard
-            time_base = Fraction(1, time_denom)
+        for packet in packet_batch:
+            # The PTS would make more sense for logging FPS than the
+            # DTS, but because of frame reordering, it makes the stats
+            # unstable.  Using DTS consistently makes the timing quite
+            # stable, and over the 100-frame window, still quite
+            # precise.
+            time_deque.append(packet.dts)
+            bit_count = packet.size * 8
+            bit_count_deque.append(bit_count)
 
-            audio_stream = avmux.add_stream("opus", options={"b": "64k"})
-            audio_stream.time_base = time_base
-            # We pre-open the codec, to make sure there's not a warmup frame.
-            audio_stream.open()
+        now = time.monotonic()
+        if now >= next_display_update and len(time_deque) > 1:
+            next_display_update = now + 0.1
+            running_time = now - start_time
+            running_minutes = int(running_time / 60)
+            running_seconds = int(running_time % 60)
+            window_secs = (time_deque[-1] - time_deque[0]) * TIME_BASE
+            # We can't use the last frame in the window when we divide
+            # by window_secs; that would be a fencepost error.
+            window_frames = len(time_deque) - 1
+            window_bits = sum(bit_count_deque) - bit_count_deque[-1]
+            fps = window_frames / window_secs
+            bits_per_sec = int(window_bits / window_secs)
+            line = (f"{running_minutes:02d}:{running_seconds:02d} "
+                    f"frame {frame_count}: {fps:.2f} fps, "
+                    f"{si_format(bits_per_sec, precision=2)}bps")
+            this_status_len = len(line)
+            full_line = f"\r{line}{' ' * (last_status_len - this_status_len)}"
+            print(full_line, end="")
+            last_status_len = this_status_len
+    # It's difficult to correctly print the fps and bitrate near the
+    # tail, since we get the last many frames as a big batch.  Instead
+    # of leaving misleading information on the screen, we erase the
+    # status display.
+    print(f"\r{' ' * last_status_len}\r", end="")
 
-            options = dict(CODEC_OPTIONS_GLOBAL)
-            if codec in CODECS:
-                options.update(CODECS[codec])
-            video_stream = avmux.add_stream(codec, rate=fps, options=options)
+
+def mux(avmux: av.container.OutputContainer, packet_batches: Iterable[Sequence[av.Packet]]) -> None:
+    for packet_batch in packet_batches:
+        avmux.mux(packet_batch)
+
+
+def parse_region(s: str) -> tuple[int, int, int, int]:
+    """Parse comma-separated region string into (left, top, right, bottom)."""
+    parts = s.split(",")
+    if len(parts) != 4:
+        msg = "region must be four comma-separated integers"
+        raise argparse.ArgumentTypeError(msg)
+    try:
+        return tuple(int(p.strip()) for p in parts)  # type: ignore[return-value]
+    except ValueError as e:
+        msg = "region values must be integers"
+        raise argparse.ArgumentTypeError(msg) from e
+
+
+def main() -> None:
+    logging.basicConfig(level=logging.DEBUG)
+    # If we don't enable PyAV's own logging, a lot of important error
+    # messages from libav won't be shown.
+    av.logging.set_level(av.logging.VERBOSE)
+
+    parser = argparse.ArgumentParser(
+        description="Capture screen video to MP4 file"
+    )
+    parser.add_argument(
+        "--fps",
+        type=int,
+        default=30,
+        help="frames per second (default: 30)"
+    )
+    monitor_group = parser.add_mutually_exclusive_group()
+    monitor_group.add_argument(
+        "--monitor",
+        type=int,
+        default=1,
+        help="monitor ID to capture (default: 1)"
+    )
+    monitor_group.add_argument(
+        "--region",
+        type=parse_region,
+        metavar="LEFT,TOP,RIGHT,BOTTOM",
+        help="region to capture as comma-separated coordinates"
+    )
+    parser.add_argument(
+        "--codec",
+        default="libx264",
+        help="video codec (default: libx264; try h264_nvenc for Nvidia hardware encoding)"
+    )
+    parser.add_argument(
+        "--output",
+        default="capture.mp4",
+        help="output filename (default: capture.mp4)"
+    )
+    args = parser.parse_args()
+
+    fps = args.fps
+    codec = args.codec
+    filename = args.output
+
+    with mss.mss() as sct:
+        if args.region:
+            left, top, right, bottom = args.region
+            monitor = {
+                "left": left,
+                "top": top,
+                "width": right - left,
+                "height": bottom - top,
+            }
+        else:
+            monitor = sct.monitors[args.monitor]
+
+        with av.open(filename, "w") as avmux:
+            # We could initialize video_stream in video_encode, but
+            # doing it here means that we can open it before starting
+            # the capture thread, which avoids a warmup frame (one
+            # that takes longer to encode because the encoder is just
+            # starting).
+            #
+            # The rate= parameter here is just the nominal frame rate:
+            # some tools (like file browsers) might display this as
+            # the frame rate.  But we actually control timing via the
+            # pts and time_base values on the frames themselves.
+            video_stream = avmux.add_stream(
+                codec, rate=fps, options=CODEC_OPTIONS
+            )
             video_stream.width = monitor["width"]
             video_stream.height = monitor["height"]
-            video_stream.time_base = time_base
+            # Setting the time_base on the stream is possible, but
+            # isn't what we need (for reasons I'm unclear on): we need
+            # to set it on the codec context.
+            video_stream.codec_context.time_base = TIME_BASE
+            # Assigning the pix_fmt is telling the video encoder what
+            # we'll be sending it, not necessarily what it will
+            # output.  If the codec supports BGRx inputs, then that's
+            # the most efficient way for us to send it our frames.
+            # Otherwise, there will be a software, CPU-side conversion
+            # step when we send it our BGRx frames.  We're actually
+            # probably sending it frames in BGR0, not BGRA, but PyAV
+            # doesn't support reading frames in BGR0, only BGRA.
+            # H.264 doesn't support an alpha channel anyway, so we can
+            # just send it BGR0 frames and tell it they're BGRA.
             if any(f.name == "bgra" for f in video_stream.codec.video_formats):
                 video_stream.pix_fmt = "bgra"
-            # We pre-open the codec, to make sure there's not a warmup frame.
+            # We open (initialize) the codec explicitly here.  PyAV
+            # will automatically open it the first time we call
+            # video_stream.encode, but the time it takes to set the
+            # codec up means the first frame would be particularly
+            # slow.
             video_stream.open()
 
-            def pipeline(q_input, fn, q_output):
-                try:
-                    while True:
-                        try:
-                            val_input = q_input.get(timeout=5)
-                        except queue.ShutDown:
-                            break
-                        val_output = fn(val_input)
-                        if q_output is not None:
-                            q_output.put(val_output, timeout=5)
-                finally:
-                    q_input.shutdown()
-                    if q_output is not None:
-                        q_output.shutdown()
-
-            q_audio_preprocess = queue.Queue(1)
-            q_audio_encode = queue.Queue(1)
-            q_video_preprocess = queue.Queue(1)
-            q_video_encode = queue.Queue(1)
-            q_mux = queue.Queue(1)
-
-            def video_capture():
-                try:
-                    next_frame_at = first_frame_at
-                    for i in trange(duration_secs * fps):
-                        while ((now := time.clock_gettime(time.CLOCK_MONOTONIC)) < next_frame_at):
-                            time.sleep(next_frame_at - now)
-                        # I think there's an easy way to make this a leaky bucket, but can't quite
-                        # think through the math right now.
-                        next_frame_at = next_frame_at + 1/fps
-                        screenshot = sct.grab(monitor)
-                        q_video_preprocess.put((screenshot, now), timeout=5)
-                finally:
-                    q_video_preprocess.shutdown()
-
-            def video_preprocess(screenshot_and_timestamp):
-                (screenshot, timestamp) = screenshot_and_timestamp
-
-                ndarray = np.frombuffer(screenshot.buffer(), dtype=np.uint8)
-                ndarray = ndarray.reshape(monitor["height"], monitor["width"], 4)
-                # from_numpy_buffer isn't documented. from_ndarray is,
-                # but that copies the data.  That's slow enough to
-                # slow things down to the point of being a bottleneck!
-                frame = av.VideoFrame.from_numpy_buffer(ndarray, format="bgra")
-
-                frame.pts = int((timestamp - first_frame_at) * 90000)
-                frame.time_base = Fraction(1, 90000)
-                return frame
-
-            video_encode = video_stream.encode
-
-            def audio_preprocess(audio_and_timestamp):
-                (audio, timestamp) = audio_and_timestamp
-                audio = audio.reshape(1, -1)
-                frame = av.AudioFrame.from_ndarray(audio, format='flt', layout='stereo')
-                frame.sample_rate = 48000
-                frame.pts = int((timestamp - first_frame_at) * 90000)
-                frame.time_base = Fraction(1, 90000)
-                return frame
-
-            audio_encode = audio_stream.encode
-
-            t_video_capture = threading.Thread(target=video_capture, name="video_capture")
-            t_video_preprocess = threading.Thread(target=pipeline, args=(q_video_preprocess, video_preprocess, q_video_encode), name="video_preprocess")
-            t_video_encode = threading.Thread(target=pipeline, args=(q_video_encode, video_encode, q_mux), name="video_encode")
-            t_audio_preprocess = threading.Thread(target=pipeline, args=(q_audio_preprocess, audio_preprocess, q_audio_encode), name="audio_preprocess")
-            t_audio_encode = threading.Thread(target=pipeline, args=(q_audio_encode, audio_encode, q_mux), name="audio_encode")
-            t_mux = threading.Thread(target=pipeline, args=(q_mux, avmux.mux, None), name="mux")
-
-            first_frame_at = time.clock_gettime(time.CLOCK_MONOTONIC)
-            t_mux.start()
-            t_video_encode.start()
-            t_video_preprocess.start()
-            t_audio_encode.start()
-            t_audio_preprocess.start()
-            t_video_capture.start()
-
-            print("Capture:   ", t_video_capture.native_id)
-            print("Preprocess:", t_video_preprocess.native_id)
-            print("Encode:    ", t_video_encode.native_id)
-            print("Mux:       ", t_mux.native_id)
-
-            with mic.recorder(samplerate=48000) as audio_recorder:
-                while t_video_capture.is_alive():
-                    data = audio_recorder.record()
-                    now = time.clock_gettime(time.CLOCK_MONOTONIC)
-                    timestamp = now - audio_recorder.latency
-                    q_audio_preprocess.put((data, timestamp))
-
-            t_video_capture.join()
-            t_video_preprocess.join()
-            t_video_encode.join()
-            t_audio_preprocess.join()
-            t_audio_encode.join()
-            t_mux.join()
-
-            print(f"Used format {video_stream.format}, "
-                  f"reformatter {video_stream.reformatter}")
+            shutdown_requested = Event()
+            def sigint_handler(_signum: int, _frame: Any) -> None:
+                # The status line will typically be visible, so start
+                # a fresh line for this message.
+                print("\nShutting down")
+                shutdown_requested.set()
+            signal.signal(signal.SIGINT, sigint_handler)
+
+            mailbox_screenshot: Mailbox[
+                tuple[mss.screenshot.ScreenShot, float]
+            ] = Mailbox()
+            mailbox_frame: Mailbox[av.VideoFrame] = Mailbox()
+            mailbox_packet_to_stats: Mailbox[Sequence[av.Packet]] = Mailbox()
+            mailbox_packet_to_mux: Mailbox[Sequence[av.Packet]] = Mailbox()
+
+            stage_video_capture = PipelineStage(
+                name="video_capture",
+                target=partial(
+                    video_capture,
+                    fps,
+                    sct,
+                    monitor,
+                    shutdown_requested,
+                ),
+                out_mailbox=mailbox_screenshot,
+            )
+            stage_video_process = PipelineStage(
+                name="video_process",
+                in_mailbox=mailbox_screenshot,
+                target=partial(video_process),
+                out_mailbox=mailbox_frame,
+            )
+            stage_video_encode = PipelineStage(
+                name="video_encode",
+                in_mailbox=mailbox_frame,
+                target=partial(video_encode, video_stream),
+                out_mailbox=mailbox_packet_to_stats,
+            )
+            stage_show_stats = PipelineStage(
+                name="show_stats",
+                in_mailbox=mailbox_packet_to_stats,
+                target=show_stats,
+                out_mailbox=mailbox_packet_to_mux,
+            )
+            stage_mux = PipelineStage(
+                name="stream_mux",
+                in_mailbox=mailbox_packet_to_mux,
+                target=partial(mux, avmux),
+            )
+
+            stage_mux.start()
+            stage_show_stats.start()
+            stage_video_process.start()
+            stage_video_encode.start()
+            stage_video_capture.start()
+
+            LOGGER.debug("Thread IDs:")
+            LOGGER.debug("  Capture:    %s", stage_video_capture.native_id)
+            LOGGER.debug("  Preprocess: %s", stage_video_process.native_id)
+            LOGGER.debug("  Encode:     %s", stage_video_encode.native_id)
+            LOGGER.debug("  Mux:        %s", stage_mux.native_id)
+
+            print("Starting video capture.  Press Ctrl-C to stop.")
+
+            stage_video_capture.join()
+            stage_video_process.join()
+            stage_video_encode.join()
+            stage_show_stats.join()
+            stage_mux.join()
+
+            if codec != "libx264" and video_stream.reformatter is not None:
+                LOGGER.warning("Software encoder is in a hardware encoding "
+                               "path; this may slow things down")
 
 
 if __name__ == "__main__":

From fba9bdabf885403a603415cfa07866502b481e30 Mon Sep 17 00:00:00 2001
From: Joel Ray Holveck <jholveck@nvidia.com>
Date: Thu, 15 Jan 2026 23:16:16 -0800
Subject: [PATCH 03/16] Add more docs to the video capture demo

Very much incomplete, sometimes stopping mid-sentence.  But I've
written enough that I don't want to lose it, so here's an intermediate
commit.
---
 demos/video-capture.py | 425 +++++++++++++++++++++++++++++++++++------
 1 file changed, 371 insertions(+), 54 deletions(-)

diff --git a/demos/video-capture.py b/demos/video-capture.py
index b048ee9..ed43db7 100755
--- a/demos/video-capture.py
+++ b/demos/video-capture.py
@@ -1,8 +1,256 @@
 #! /usr/bin/env python3
 
-# In one test, here's some numbers this program could achieve.  This
-# is just meant as a rough guide; your results will almost certainly
-# vary significantly.
+# This demo isn't meant to be a comprehensive explanation of video
+# encoding.  There are, however, some concepts that are unavoidable
+# when converting from a sequence of snapshots to a video file.  We'll
+# go over some of those here.
+#
+# The descriptions given here are simplified.  It doesn't go into the
+# more obscure details, like H.264 switching frames or the AAC priming
+# delay.  Nevertheless, this should be enough to get the concepts
+# you'll need to understand and build on this demo.
+#
+#
+# libav
+# -----
+#
+# If you care enough about video files to be reading this, you've
+# probably used ffmpeg.  This is a Swiss Army Knife of video file
+# manipulation.
+#
+# The ffmpeg tool is based on several libraries, which are part of
+# ffmpeg, and widely used elsewhere:
+# 
+# - libavcodec: Encoding/decoding library
+# - libavfilter: Graph-based frame editing library
+# - libavformat: I/O and muxing/demuxing library
+# - libavdevice: Special devices muxing/demuxing library
+# - libavutil: Common utility library
+# - libswresample: Audio resampling, format conversion and mixing
+# - libswscale: Color conversion and scaling library
+#
+# In this demo, I just refer to these collectively as "libav".  Think
+# of these as the library version of ffmpeg.  We mostly use libavcodec
+# and libavformat, but that detail isn't something we see in Python:
+# all these libraries are essentially one giant bundle as far as we
+# care.
+#
+# The libav libaries are in C.  We use the PyAV library.  This is not
+# simply bindings or a direct translation of the libav C API to
+# Python, but rather, a library that's based on libav, but meant to be
+# more Pythonic.  
+#
+# [note: it's important to include the fact that pyav.org has outdated
+# docs, since they show up prominently in Google searches.  The link
+# to the GitHub issue is just to tell people that the ]
+#
+# The docs for PyAV are at <https://pyav.basswood-io.com/docs/stable/>.
+# The older docs at pyav.org are outdated; see
+# <https://github.com/PyAV-Org/PyAV/issues/1574>.
+#
+# There was briefly a fork called basewood-av, but it has since been
+# discontinued and merged back into PyAV; see
+# <https://github.com/PyAV-Org/PyAV/discussions/1827>.  Despite the
+# domain name, pyav.basswood-io.com hosts the current official PyAV
+# documentation, not fork-specific docs.
+#
+# The PyAV developers are separate from ffmpeg, and there is a bit of
+# a difference in the approaches that PyAV takes.  See also
+# https://pyav.basswood-io.com/docs/stable/overview/caveats.html
+#
+#
+# Container Files
+# ---------------
+#
+# A single file, like kittycat.mp4, is called a "container" in media
+# file terms.  This is a collection of "streams" (sometimes called
+# "elementary streams"), all woven together.
+#
+# It might contain just a video stream (like we do here), just an
+# audio stream (like in a .m4a file, which is just a renamed .mp4
+# file), or both (most common).  It might also contain several of
+# each; for instance, different languages will usually be in separate
+# audio streams.  There are other stream types, like subtitles, as
+# well.
+#
+# Weaving these streams together is called "multiplexing", or "muxing"
+# for short.  Each stream's data gets bundled into "chunks" that are
+# typically called "packets".  The container keeps packets from the
+# same time, but different streams, close to each other in the file.
+#
+# (By the way, the term "packet" is a holdover from MPEG-2 and before.
+# Technically, MP4 files don't have packets: they have chunks, which
+# can hold AAC frames, H.264 NALs, etc.  The data that used to be in
+# MPEG-2 packet headers is now in MP4 tables.  To keep the terminology
+# consistent between codecs and container formats, libav refers to the
+# objects encapsulating all this as packets, regardless of the codec
+# or container format.)
+#
+# For instance, at the beginning of the file, you might have one audio
+# packet covering the first 21 ms, then a subtitle packet covering the
+# first several seconds, then seven video packets each covering 3 ms,
+# followed by another audio packet for the next 21 ms, and so on.
+#
+#
+# Video Codecs
+# ------------
+#
+# Within an MP4 file, the video can be stored in a lot of different
+# formats.  These are the most common:
+#
+# - MPEG-2: used by DVDs, not much else anymore.
+# - MPEG-4 Part 2: also known as DivX.  Very popular in the early 2000s,
+#   not seen much anymore except older archives.
+# - H.264: commonly used by BluRay, many streaming services, and many
+#   MP4 files in the wild.
+# - H.265: increasingly used, but not supported by older hardware.
+# - AV1 and VP9: used by some streaming services; hardware support
+#   varies, so these are typically offered alongside H.264 (or H.265)
+#   as fallbacks.
+#
+# These are all stream formats.  There are many libraries that can
+# create these files.  These libraries are known as "codecs".  In
+# some contexts, the word "codec" is also used to name the stream
+# format itself, so "H.264" might sometimes be called a codec.
+#
+# In this demo, we use H.264, since it's the most common.  You can
+# also specify other codecs.
+#
+# In ffmpeg, and the av libraries that we use here, the best codec for
+# H.264 that doesn't require any specific hardware is libx264.  There
+# are also faster ones that are hardware-accelerated, such as
+# h264_nvenc which uses specialized chips on Nvidia video cards.
+#
+#
+# Frame Types
+# -----------
+#
+# Reference: https://en.wikipedia.org/wiki/Video_compression_picture_types
+#
+# [Note: We can probably just give a brief description of the frame
+# types.]
+#
+# The reason that video files can compress so well, much better than
+# storing a JPEG for each frame, is that the file often can describe
+# just the motion.  In a video of a cat meowing, the first frame will
+# have everything that's visible: the room in the background, the
+# entire cat, the whole thing.  We call a video frame that stores the
+# whole picture an "I-frame".
+#
+# But the second frame just has to talk about what's changed in that
+# 1/30 sec: it can just say that the tail moved this much to the left,
+# the eyes closed slightly, what the now-visible bits of the eyelids
+# look like, what's changed about the ear when it moved, etc.  We call
+# this sort of frame, one that just stores the differences from a
+# previous frame, a "P-frame".
+#
+# We still want to refresh the whole picture from scratch from time to
+# time.  Since the differences between video frames are compressed,
+# they're also imperfect.  Over time, these imperfections can
+# accumulate.  Also, sometimes a frame may have been lost between when
+# we store it and when the viewer sees it, such as if we made a DVD
+# that later got scratched; we want to let the viewer recover from
+# such a situation.  To keep things clean, we sometimes send out a new
+# I-frame, redrawing the whole picture anew.  This normally happens
+# about every 0.5 to 2 seconds, depending on the program's purpose.
+# The group of pictures starting with a fresh I-frame is,
+# straightforwardly enough, called a "group of pictures" (GOP).
+#
+# Sometimes, it's useful for a frame to give motion based not just on
+# the past, but also the future.  For instance, when the cat's mouth
+# first starts to open, you might want to say "look ahead at how the
+# inside of the mouth looks when it's totally open, and draw just this
+# tiny sliver of it now."  These are called "B-frames".
+#
+# A GOP usually arranges these frame types in a cadence, like
+# IBBPBBPBB....  The specifics are up to the encoder, but the user can
+# normally configure it to some degree.
+#
+#
+# Timestamps
+# ----------
+#
+# [note: Managing the PTS is a big part of the code, so I want to
+# describe it.  The DTS is also worth at least highlighting, as is the
+# fact that packets from the encoder may be in a different order than
+# presentation order.]
+#
+# In a video file, time is very important.  It's used to synchronize
+# audio and video, to prevent frame timing quantization from causing
+# the clock to drift, and many other purposes.
+#
+# The time at which each frame should be shown is called its
+# "presentation time stamp", or "PTS".  Normally, the PTS of the first
+# frame is 0, and the rest of the video file is based on that.
+#
+# Because B-frames can require future frames to interpret, the future
+# frames they depend on have to be decoded first.  That means that the
+# order in which frames are decoded can be different from the order in
+# which they are presented.  This leads to a second timestamp on each
+# frame: the "decoding time stamp", or "DTS".
+#
+# Different container formats store the timestamps in different
+# places: the container's structures, the packet headers, the streams,
+# etc.  Because of this, there are multiple places that carry
+# timestamps.  You can just set the timestamp on the video frame, and
+# libav will propagate it from there to the packets and so forth.
+#
+#
+# Time Base
+# ---------
+#
+# [note: Most people new to video encoding may assume that timestamps
+# are in float or integer nanoseconds or something, so the concept of
+# the time base is significant.  We also attach it to multiple
+# objects: the container object, the video stream context object, and
+# each frame.  So, the reason we do that is worth noting.  Preserve the
+# link to the PyAV docs.]
+#
+# In most video file formats, the time isn't specified in predefined
+# units like nanoseconds.  Instead, in your video file, you specify
+# the time units you're using, a fraction of a second.  This is called
+# your time base.
+#
+# There are a lot of different places in a video encoding pipeline
+# where you set a time base: everywhere that might need to encode a
+# timestamp.  They don't necessarily have to be the same (PyAV will
+# convert between the different time bases as needed), so the time
+# base has to be set on several objects.  See also
+# <https://pyav.basswood-io.com/docs/stable/api/time.html>
+#
+# In this demo, we use a common time base of 1/90000 sec everywhere.
+# This is a common standard, from the MPEG world.  It became a
+# standard because it can exactly represent 24 fps (film), 25 fps
+# (European TV), 30 fps (US TV, nominally), and 30000/1001 fps (about
+# 29.97, US broadcast TV).
+#
+#
+# Performance
+# -----------
+#
+# This demo uses multiple threads to improve performance.  These
+# threads are pipelined; see the comments at the start of
+# common/pipeline.py for information about that concept.
+#
+# In a pipelined design, the slowest stage usually sets the overall
+# rate.  Suppose you and your roommates are all doing the dishes:
+# Alice collects dishes and scrapes off food, Bob washes the dishes,
+# Carol rinses them, Dave dries them, and Evelyn puts them away.
+# If the
+#
+# [note: A detailed description of pipelining threads is in
+# common/pipeline.py.  This section should discuss the stages
+# we're using, and note that the encoding stage is usually the
+# bottleneck.]
+#
+#
+#
+# [note: Not sure where to integrate this, but make sure the numbers
+# are somewhere.]
+#
+# In one test, here's some numbers this program could achieve, on an
+# idle system.  This is just meant as a rough guide; your results will
+# almost certainly vary significantly.
 # - libx264, 1920x1080: 80 fps
 # - libx264, 3840x2160: 18 fps
 # - h264_nvenc, 1920x1080: 190 fps
@@ -22,10 +270,11 @@
 
 import av
 import numpy as np
-from common.pipeline import Mailbox, PipelineStage
 from si_prefix import si_format
 
 import mss
+from common.pipeline import Mailbox, PipelineStage
+
 
 # These are the options you'd give to ffmpeg that would affect the
 # video codec.
@@ -33,13 +282,11 @@
     # The "high" profile means that the encoder can use some H.264
     # features that are widely supported, but not mandatory.
     "profile": "high",
-
     # The "medium" preset is as good of a preset as any for a demo
     # like this.  Different codecs have different presets; the the
     # h264_nvenc actually prefers "p4", but accepts "medium" as a
     # similar preset.
     "preset": "medium",
-
     # 6 Mbit/sec is vaguely the ballpark for a good-quality video at
     # 1080p and 30 fps, but there's a lot of variation.  We're just
     # giving the target bitrate: the second-to-second bitrate will
@@ -47,36 +294,46 @@
     # this on a nearly-still screen, though, then the actual bitrate
     # will be much lower, since there's not much motion to encode!
     "b": "6M",
-
     # Let the encoder hold some frames for analysis, and flush them
     # later.  This especially helps with the hardware-accelerated
     # codecs.
     "rc-lookahead": "40",
 }
 
-# There are a lot of different places in a video encoding pipeline
-# where time_base matters, and they don't necessarily have to be the
-# same, so the time base has to be set on several objects.  In this
-# program, we do use a common time base of 1/90000 seconds everywhere.
-# This is a common standard, from the MPEG world.
+
 TIME_BASE = Fraction(1, 90000)
 
 LOGGER = logging.getLogger("video-capture")
 
+
 def video_capture(
     fps: int,
     sct: mss.base.MSSBase,
     monitor: mss.models.Monitor,
     shutdown_requested: Event,
 ) -> Generator[tuple[mss.screenshot.ScreenShot, float], None, None]:
+    # Keep track of the time when we want to get the next frame.  We
+    # limit the frame time this way instead of sleeping 1/30 sec each
+    # frame, since we want to also account for the time taken to get
+    # the screenshot and other overhead.
+    #
+    # Repeatedly adding small floating-point numbers to a total does
+    # cause some numeric inaccuracies, but it's small enough for our
+    # purposes.  The program would have to run for three months to
+    # accumulate one millisecond of inaccuracy.
     next_frame_at = time.monotonic()
-    capture_period = 1 / fps
+
+    # Keep running this loop until the main thread says we should
+    # stop.
     while not shutdown_requested.is_set():
-        # Wait until we're ready.
+
+        # Wait until we're ready.  This should, ideally, happen every
+        # 1/30 second.
         while (now := time.monotonic()) < next_frame_at:
+            # 
             time.sleep(next_frame_at - now)
 
-        # Capture and yield a frame.
+        # Capture a frame, and send it to the next processing stage.
         screenshot = sct.grab(monitor)
         yield screenshot, now
 
@@ -86,9 +343,9 @@ def video_capture(
         # a bit of "timing debt" in next_frame_at: it'll be a little
         # sooner than now + one frame.  We'll hopefully be able to
         # catch up soon.
-        next_frame_at = next_frame_at + capture_period
+        next_frame_at = next_frame_at + (1 / fps)
 
-        # If we've accumulated over one frame's worth of catch-up,
+        # If we've accumulated over one frame's worth of timing debt,
         # then that will say that next_frame_at is sooner than now.
         # If we're accumulating too much debt, we want to wipe it out,
         # rather than having a huge burst of closely-spaced captures
@@ -100,7 +357,7 @@ def video_capture(
         # desired capture period.
         if next_frame_at < now:
             missed_frames = floor((now - next_frame_at) * fps)
-            next_frame_at += (missed_frames + 1) * capture_period
+            next_frame_at += (missed_frames + 1) / fps
 
 
 def video_process(
@@ -108,14 +365,43 @@ def video_process(
         tuple[mss.screenshot.ScreenShot, float]
     ],
 ) -> Generator[av.VideoFrame, None, None]:
+    # We track when the first
     first_frame_at: float | None = None
+
     for screenshot, timestamp in screenshot_and_timestamp:
+        # A screenshot's pixel data can take a long time to copy.
+        # Just for the CPU to copy the bytes, on my hardware, takes
+        # about 3ms for a 4k screenshot.  This means we want to be
+        # very careful about how we want to get the data from the
+        # ScreenShot object to the VideoFrame.
+        #
+        # In Python, there's a concept called a "buffer".  This is a
+        # range of memory that can be shared between objects, so the
+        # objects don't have to copy the data.  This is very common in
+        # libraries like NumPy that work with very large datasets, and
+        # interpret that data in different ways.
+        #
+        # The most common buffers are in extensions written in C, but
+        # Python objects of type memoryview, bytes, bytearray, and
+        # array.array are all buffers.  The screenshot.bgra attribute
+        # is also a buffer.  (Currently, it's a bytes object, but this
+        # may change in the future.)
+        #
+        # PyAV doesn't let you create a VideoFrame object directly
+        # from pixel data in a buffer.  (It is possible to update the
+        # data in a VideoFrame to point to a different buffer, but
+        # that still allocates the memory first.)
+        #
+        # However, while it's not documented, PyAV does have the
+        # from_numpy_buffer method (separately from the from_ndarray
+        # method).  This creates a VideoFrame that shares memory with
+        # a NumPy array.  We tell NumPy to create a new ndarray that
+        # shares the screenshot's buffer, and create a VideoFrame that
+        # uses that buffer.
         ndarray = np.frombuffer(screenshot.bgra, dtype=np.uint8)
         ndarray = ndarray.reshape(screenshot.height, screenshot.width, 4)
-        # from_numpy_buffer isn't documented.  from_ndarray is, but
-        # that copies the data.  That's slow enough to slow things
-        # down to the point of being a real bottleneck!
         frame = av.VideoFrame.from_numpy_buffer(ndarray, format="bgra")
+        # Set the PTS and time base for the frame.
         if first_frame_at is None:
             first_frame_at = timestamp
         frame.pts = int((timestamp - first_frame_at) / TIME_BASE)
@@ -133,7 +419,9 @@ def video_encode(
     yield video_stream.encode(None)
 
 
-def show_stats(packet_batches: Iterable[Sequence[av.Packet]]) -> Iterable[Sequence[av.Packet]]:
+def show_stats(
+    packet_batches: Iterable[Sequence[av.Packet]],
+) -> Iterable[Sequence[av.Packet]]:
     """Display streaming statistics (FPS and throughput).
 
     Statistics are displayed over a 100-frame sliding window.
@@ -157,9 +445,9 @@ def show_stats(packet_batches: Iterable[Sequence[av.Packet]]) -> Iterable[Sequen
         for packet in packet_batch:
             # The PTS would make more sense for logging FPS than the
             # DTS, but because of frame reordering, it makes the stats
-            # unstable.  Using DTS consistently makes the timing quite
-            # stable, and over the 100-frame window, still quite
-            # precise.
+            # a little bit unstable.  Using DTS consistently makes the
+            # timing quite stable, and over the 100-frame window,
+            # still quite precise.
             time_deque.append(packet.dts)
             bit_count = packet.size * 8
             bit_count_deque.append(bit_count)
@@ -177,9 +465,11 @@ def show_stats(packet_batches: Iterable[Sequence[av.Packet]]) -> Iterable[Sequen
             window_bits = sum(bit_count_deque) - bit_count_deque[-1]
             fps = window_frames / window_secs
             bits_per_sec = int(window_bits / window_secs)
-            line = (f"{running_minutes:02d}:{running_seconds:02d} "
-                    f"frame {frame_count}: {fps:.2f} fps, "
-                    f"{si_format(bits_per_sec, precision=2)}bps")
+            line = (
+                f"{running_minutes:02d}:{running_seconds:02d} "
+                f"frame {frame_count}: {fps:.2f} fps, "
+                f"{si_format(bits_per_sec, precision=2)}bps"
+            )
             this_status_len = len(line)
             full_line = f"\r{line}{' ' * (last_status_len - this_status_len)}"
             print(full_line, end="")
@@ -191,7 +481,10 @@ def show_stats(packet_batches: Iterable[Sequence[av.Packet]]) -> Iterable[Sequen
     print(f"\r{' ' * last_status_len}\r", end="")
 
 
-def mux(avmux: av.container.OutputContainer, packet_batches: Iterable[Sequence[av.Packet]]) -> None:
+def mux(
+    avmux: av.container.OutputContainer,
+    packet_batches: Iterable[Sequence[av.Packet]],
+) -> None:
     for packet_batch in packet_batches:
         avmux.mux(packet_batch)
 
@@ -219,39 +512,42 @@ def main() -> None:
         description="Capture screen video to MP4 file"
     )
     parser.add_argument(
-        "--fps",
-        type=int,
-        default=30,
-        help="frames per second (default: 30)"
+        "-f", "--fps", type=int, default=30, help="frames per second (default: 30)"
     )
     monitor_group = parser.add_mutually_exclusive_group()
     monitor_group.add_argument(
-        "--monitor",
+        "-m", "--monitor",
         type=int,
         default=1,
-        help="monitor ID to capture (default: 1)"
+        help="monitor ID to capture (default: 1)",
     )
     monitor_group.add_argument(
-        "--region",
+        "-r", "--region",
         type=parse_region,
         metavar="LEFT,TOP,RIGHT,BOTTOM",
-        help="region to capture as comma-separated coordinates"
+        help="region to capture as comma-separated coordinates",
     )
     parser.add_argument(
-        "--codec",
+        "-c", "--codec",
         default="libx264",
-        help="video codec (default: libx264; try h264_nvenc for Nvidia hardware encoding)"
+        help="video codec (default: libx264; try h264_nvenc for Nvidia hardware encoding)",
     )
     parser.add_argument(
-        "--output",
+        "-d", "--duration-secs",
+        type=float,
+        help="Duration to record (default: no limit)",
+    )
+    parser.add_argument(
+        "-o", "--output",
         default="capture.mp4",
-        help="output filename (default: capture.mp4)"
+        help="output filename (default: capture.mp4)",
     )
     args = parser.parse_args()
 
     fps = args.fps
     codec = args.codec
     filename = args.output
+    duration_secs = args.duration_secs
 
     with mss.mss() as sct:
         if args.region:
@@ -292,10 +588,12 @@ def main() -> None:
             # Otherwise, there will be a software, CPU-side conversion
             # step when we send it our BGRx frames.  We're actually
             # probably sending it frames in BGR0, not BGRA, but PyAV
-            # doesn't support reading frames in BGR0, only BGRA.
-            # H.264 doesn't support an alpha channel anyway, so we can
-            # just send it BGR0 frames and tell it they're BGRA.
-            if any(f.name == "bgra" for f in video_stream.codec.video_formats):
+            # doesn't claim to support reading frames in BGR0, only
+            # BGRA.  H.264 doesn't support an alpha channel anyway, so
+            # we can just send it BGR0 frames and tell it they're BGRA.
+            if any(
+                f.name == "bgra" for f in video_stream.codec.video_formats
+            ):
                 video_stream.pix_fmt = "bgra"
             # We open (initialize) the codec explicitly here.  PyAV
             # will automatically open it the first time we call
@@ -305,12 +603,6 @@ def main() -> None:
             video_stream.open()
 
             shutdown_requested = Event()
-            def sigint_handler(_signum: int, _frame: Any) -> None:
-                # The status line will typically be visible, so start
-                # a fresh line for this message.
-                print("\nShutting down")
-                shutdown_requested.set()
-            signal.signal(signal.SIGINT, sigint_handler)
 
             mailbox_screenshot: Mailbox[
                 tuple[mss.screenshot.ScreenShot, float]
@@ -360,7 +652,7 @@ def sigint_handler(_signum: int, _frame: Any) -> None:
             stage_video_encode.start()
             stage_video_capture.start()
 
-            LOGGER.debug("Thread IDs:")
+            LOGGER.debug("Native thread IDs:")
             LOGGER.debug("  Capture:    %s", stage_video_capture.native_id)
             LOGGER.debug("  Preprocess: %s", stage_video_process.native_id)
             LOGGER.debug("  Encode:     %s", stage_video_encode.native_id)
@@ -368,6 +660,29 @@ def sigint_handler(_signum: int, _frame: Any) -> None:
 
             print("Starting video capture.  Press Ctrl-C to stop.")
 
+            old_sigint_handler = None
+            def sigint_handler(_signum: int, _frame: Any) -> None:
+                # Restore the default behavior, so if our shutdown
+                # doesn't work because of a bug in our code, the user
+                # can still press ^C again to terminate the program.
+                # (The default handler is also in
+                # signal.default_int_handler, but that's not
+                # documented.)
+                signal.signal(signal.SIGINT, old_sigint_handler)
+                # The status line will typically be visible, so start
+                # a fresh line for this message.
+                print("\nShutting down")
+                shutdown_requested.set()
+            signal.signal(signal.SIGINT, sigint_handler)
+
+            if duration_secs is not None:
+                stage_video_capture.join(timeout=duration_secs)
+                # Either the join timed out, or we processed a ^C and
+                # requested it exit.  Either way, it's safe to set the
+                # shutdown event again, and return to our normal
+                # processing loop.
+                shutdown_requested.set()
+            
             stage_video_capture.join()
             stage_video_process.join()
             stage_video_encode.join()
@@ -375,8 +690,10 @@ def sigint_handler(_signum: int, _frame: Any) -> None:
             stage_mux.join()
 
             if codec != "libx264" and video_stream.reformatter is not None:
-                LOGGER.warning("Software encoder is in a hardware encoding "
-                               "path; this may slow things down")
+                LOGGER.warning(
+                    "Software encoder is in a hardware encoding "
+                    "path; this may slow things down"
+                )
 
 
 if __name__ == "__main__":

From e788690a5feacbbf8d1c5490c34a845ab5a20858 Mon Sep 17 00:00:00 2001
From: Joel Holveck <jholveck@nvidia.com>
Date: Fri, 16 Jan 2026 09:55:36 +0000
Subject: [PATCH 04/16] Improve comments

---
 demos/video-capture.py | 495 ++++++++++++++++-------------------------
 1 file changed, 195 insertions(+), 300 deletions(-)

diff --git a/demos/video-capture.py b/demos/video-capture.py
index ed43db7..ea28c02 100755
--- a/demos/video-capture.py
+++ b/demos/video-capture.py
@@ -1,260 +1,92 @@
 #! /usr/bin/env python3
 
-# This demo isn't meant to be a comprehensive explanation of video
-# encoding.  There are, however, some concepts that are unavoidable
-# when converting from a sequence of snapshots to a video file.  We'll
-# go over some of those here.
+# This demo shows one common use case for MSS: capture the screen and
+# write a real video file (MP4) rather than saving individual images.
 #
-# The descriptions given here are simplified.  It doesn't go into the
-# more obscure details, like H.264 switching frames or the AAC priming
-# delay.  Nevertheless, this should be enough to get the concepts
-# you'll need to understand and build on this demo.
+# It’s intentionally not a full “video encoding” course. The goal is
+# to explain the few concepts that show up throughout the program so
+# you can read, tweak, and extend it.
 #
+# What tools are we using?
+# ------------------------
 #
-# libav
-# -----
+# Most people first meet video encoding through the `ffmpeg` command.
+# Under the hood, ffmpeg is built on the “libav*” C libraries. In this
+# demo we use PyAV (`import av`), which is a Pythonic wrapper around
+# those libraries.
 #
-# If you care enough about video files to be reading this, you've
-# probably used ffmpeg.  This is a Swiss Army Knife of video file
-# manipulation.
-#
-# The ffmpeg tool is based on several libraries, which are part of
-# ffmpeg, and widely used elsewhere:
-# 
-# - libavcodec: Encoding/decoding library
-# - libavfilter: Graph-based frame editing library
-# - libavformat: I/O and muxing/demuxing library
-# - libavdevice: Special devices muxing/demuxing library
-# - libavutil: Common utility library
-# - libswresample: Audio resampling, format conversion and mixing
-# - libswscale: Color conversion and scaling library
-#
-# In this demo, I just refer to these collectively as "libav".  Think
-# of these as the library version of ffmpeg.  We mostly use libavcodec
-# and libavformat, but that detail isn't something we see in Python:
-# all these libraries are essentially one giant bundle as far as we
-# care.
-#
-# The libav libaries are in C.  We use the PyAV library.  This is not
-# simply bindings or a direct translation of the libav C API to
-# Python, but rather, a library that's based on libav, but meant to be
-# more Pythonic.  
-#
-# [note: it's important to include the fact that pyav.org has outdated
-# docs, since they show up prominently in Google searches.  The link
-# to the GitHub issue is just to tell people that the ]
-#
-# The docs for PyAV are at <https://pyav.basswood-io.com/docs/stable/>.
-# The older docs at pyav.org are outdated; see
+# PyAV docs: <https://pyav.basswood-io.com/docs/stable/>
+# Note: the older docs at pyav.org are outdated; see
 # <https://github.com/PyAV-Org/PyAV/issues/1574>.
+# Caveats: <https://pyav.basswood-io.com/docs/stable/overview/caveats.html>
 #
-# There was briefly a fork called basewood-av, but it has since been
-# discontinued and merged back into PyAV; see
-# <https://github.com/PyAV-Org/PyAV/discussions/1827>.  Despite the
-# domain name, pyav.basswood-io.com hosts the current official PyAV
-# documentation, not fork-specific docs.
-#
-# The PyAV developers are separate from ffmpeg, and there is a bit of
-# a difference in the approaches that PyAV takes.  See also
-# https://pyav.basswood-io.com/docs/stable/overview/caveats.html
-#
-#
-# Container Files
-# ---------------
-#
-# A single file, like kittycat.mp4, is called a "container" in media
-# file terms.  This is a collection of "streams" (sometimes called
-# "elementary streams"), all woven together.
-#
-# It might contain just a video stream (like we do here), just an
-# audio stream (like in a .m4a file, which is just a renamed .mp4
-# file), or both (most common).  It might also contain several of
-# each; for instance, different languages will usually be in separate
-# audio streams.  There are other stream types, like subtitles, as
-# well.
-#
-# Weaving these streams together is called "multiplexing", or "muxing"
-# for short.  Each stream's data gets bundled into "chunks" that are
-# typically called "packets".  The container keeps packets from the
-# same time, but different streams, close to each other in the file.
-#
-# (By the way, the term "packet" is a holdover from MPEG-2 and before.
-# Technically, MP4 files don't have packets: they have chunks, which
-# can hold AAC frames, H.264 NALs, etc.  The data that used to be in
-# MPEG-2 packet headers is now in MP4 tables.  To keep the terminology
-# consistent between codecs and container formats, libav refers to the
-# objects encapsulating all this as packets, regardless of the codec
-# or container format.)
-#
-# For instance, at the beginning of the file, you might have one audio
-# packet covering the first 21 ms, then a subtitle packet covering the
-# first several seconds, then seven video packets each covering 3 ms,
-# followed by another audio packet for the next 21 ms, and so on.
-#
+# Containers, streams, and codecs
+# -------------------------------
 #
-# Video Codecs
-# ------------
+# A file like `capture.mp4` is a *container*: it holds one or more
+# *streams* (usually video and/or audio). This demo writes one video
+# stream.
 #
-# Within an MP4 file, the video can be stored in a lot of different
-# formats.  These are the most common:
+# The container interleaves (“muxes”) stream data so players can read
+# everything in timestamp order. libav calls those pieces “packets”.
+# (In MP4 they’re not literally network-style packets; the term is a
+# longstanding libav abstraction.)
 #
-# - MPEG-2: used by DVDs, not much else anymore.
-# - MPEG-4 Part 2: also known as DivX.  Very popular in the early 2000s,
-#   not seen much anymore except older archives.
-# - H.264: commonly used by BluRay, many streaming services, and many
-#   MP4 files in the wild.
-# - H.265: increasingly used, but not supported by older hardware.
-# - AV1 and VP9: used by some streaming services; hardware support
-#   varies, so these are typically offered alongside H.264 (or H.265)
-#   as fallbacks.
+# A *codec* is the algorithm that compresses/decompresses a stream.
+# For MP4 video, common codecs include H.264 and H.265. This demo
+# defaults to H.264 via `libx264`, because it’s widely supported. You
+# can switch to hardware encoders (e.g. `h264_nvenc`) if available.
 #
-# These are all stream formats.  There are many libraries that can
-# create these files.  These libraries are known as "codecs".  In
-# some contexts, the word "codec" is also used to name the stream
-# format itself, so "H.264" might sometimes be called a codec.
+# Frames and frame reordering (I/P/B)
+# ----------------------------------
 #
-# In this demo, we use H.264, since it's the most common.  You can
-# also specify other codecs.
+# Video is encoded as a sequence of frames:
+# - I-frames: complete images.
+# - P-frames: changes from previous frames.
+# - B-frames: changes predicted using both past *and future* frames.
 #
-# In ffmpeg, and the av libraries that we use here, the best codec for
-# H.264 that doesn't require any specific hardware is libx264.  There
-# are also faster ones that are hardware-accelerated, such as
-# h264_nvenc which uses specialized chips on Nvidia video cards.
+# B-frames are why “the order frames are encoded/decoded” can differ
+# from “the order frames are shown”. That leads directly to timestamps.
 #
+# Timestamps (PTS/DTS)
+# --------------------
 #
-# Frame Types
-# -----------
+# Every frame has a *presentation timestamp* (PTS): when the viewer
+# should see it.
 #
-# Reference: https://en.wikipedia.org/wiki/Video_compression_picture_types
+# Encoders may output packets in a different order due to B-frames.
+# Those packets also have a *decode timestamp* (DTS): when the decoder
+# must decode them so the PTS schedule can be met.
 #
-# [Note: We can probably just give a brief description of the frame
-# types.]
+# In this demo we set PTS on `VideoFrame`s and let libav/PyAV propagate
+# timestamps into the encoded packets.
 #
-# The reason that video files can compress so well, much better than
-# storing a JPEG for each frame, is that the file often can describe
-# just the motion.  In a video of a cat meowing, the first frame will
-# have everything that's visible: the room in the background, the
-# entire cat, the whole thing.  We call a video frame that stores the
-# whole picture an "I-frame".
-#
-# But the second frame just has to talk about what's changed in that
-# 1/30 sec: it can just say that the tail moved this much to the left,
-# the eyes closed slightly, what the now-visible bits of the eyelids
-# look like, what's changed about the ear when it moved, etc.  We call
-# this sort of frame, one that just stores the differences from a
-# previous frame, a "P-frame".
-#
-# We still want to refresh the whole picture from scratch from time to
-# time.  Since the differences between video frames are compressed,
-# they're also imperfect.  Over time, these imperfections can
-# accumulate.  Also, sometimes a frame may have been lost between when
-# we store it and when the viewer sees it, such as if we made a DVD
-# that later got scratched; we want to let the viewer recover from
-# such a situation.  To keep things clean, we sometimes send out a new
-# I-frame, redrawing the whole picture anew.  This normally happens
-# about every 0.5 to 2 seconds, depending on the program's purpose.
-# The group of pictures starting with a fresh I-frame is,
-# straightforwardly enough, called a "group of pictures" (GOP).
-#
-# Sometimes, it's useful for a frame to give motion based not just on
-# the past, but also the future.  For instance, when the cat's mouth
-# first starts to open, you might want to say "look ahead at how the
-# inside of the mouth looks when it's totally open, and draw just this
-# tiny sliver of it now."  These are called "B-frames".
-#
-# A GOP usually arranges these frame types in a cadence, like
-# IBBPBBPBB....  The specifics are up to the encoder, but the user can
-# normally configure it to some degree.
-#
-#
-# Timestamps
-# ----------
-#
-# [note: Managing the PTS is a big part of the code, so I want to
-# describe it.  The DTS is also worth at least highlighting, as is the
-# fact that packets from the encoder may be in a different order than
-# presentation order.]
-#
-# In a video file, time is very important.  It's used to synchronize
-# audio and video, to prevent frame timing quantization from causing
-# the clock to drift, and many other purposes.
-#
-# The time at which each frame should be shown is called its
-# "presentation time stamp", or "PTS".  Normally, the PTS of the first
-# frame is 0, and the rest of the video file is based on that.
-#
-# Because B-frames can require future frames to interpret, the future
-# frames they depend on have to be decoded first.  That means that the
-# order in which frames are decoded can be different from the order in
-# which they are presented.  This leads to a second timestamp on each
-# frame: the "decoding time stamp", or "DTS".
-#
-# Different container formats store the timestamps in different
-# places: the container's structures, the packet headers, the streams,
-# etc.  Because of this, there are multiple places that carry
-# timestamps.  You can just set the timestamp on the video frame, and
-# libav will propagate it from there to the packets and so forth.
-#
-#
-# Time Base
+# Time base
 # ---------
 #
-# [note: Most people new to video encoding may assume that timestamps
-# are in float or integer nanoseconds or something, so the concept of
-# the time base is significant.  We also attach it to multiple
-# objects: the container object, the video stream context object, and
-# each frame.  So, the reason we do that is worth noting.  Preserve the
-# link to the PyAV docs.]
-#
-# In most video file formats, the time isn't specified in predefined
-# units like nanoseconds.  Instead, in your video file, you specify
-# the time units you're using, a fraction of a second.  This is called
-# your time base.
+# Timestamps are integers, and their unit is a fraction of a second
+# called the *time base*. For example, with a time base of 1/90000,
+# a timestamp of 90000 means “1 second”. PyAV will convert between time
+# bases when needed, but you must set them consistently where you
+# generate timestamps.
 #
-# There are a lot of different places in a video encoding pipeline
-# where you set a time base: everywhere that might need to encode a
-# timestamp.  They don't necessarily have to be the same (PyAV will
-# convert between the different time bases as needed), so the time
-# base has to be set on several objects.  See also
-# <https://pyav.basswood-io.com/docs/stable/api/time.html>
+# See <https://pyav.basswood-io.com/docs/stable/api/time.html>
 #
-# In this demo, we use a common time base of 1/90000 sec everywhere.
-# This is a common standard, from the MPEG world.  It became a
-# standard because it can exactly represent 24 fps (film), 25 fps
-# (European TV), 30 fps (US TV, nominally), and 30000/1001 fps (about
-# 29.97, US broadcast TV).
+# This demo uses a time base of 1/90000 (a common MPEG-derived choice).
 #
+# Performance (why multiple threads?)
+# ----------------------------------
 #
-# Performance
-# -----------
+# Capturing frames, converting them to `VideoFrame`s, encoding, and
+# muxing are separate stages. This demo pipelines those stages across
+# threads so that (for example) encoding can run while the next screen
+# grab is happening. The slowest stage typically limits overall FPS.
 #
-# This demo uses multiple threads to improve performance.  These
-# threads are pipelined; see the comments at the start of
-# common/pipeline.py for information about that concept.
-#
-# In a pipelined design, the slowest stage usually sets the overall
-# rate.  Suppose you and your roommates are all doing the dishes:
-# Alice collects dishes and scrapes off food, Bob washes the dishes,
-# Carol rinses them, Dave dries them, and Evelyn puts them away.
-# If the
-#
-# [note: A detailed description of pipelining threads is in
-# common/pipeline.py.  This section should discuss the stages
-# we're using, and note that the encoding stage is usually the
-# bottleneck.]
-#
-#
-#
-# [note: Not sure where to integrate this, but make sure the numbers
-# are somewhere.]
-#
-# In one test, here's some numbers this program could achieve, on an
-# idle system.  This is just meant as a rough guide; your results will
-# almost certainly vary significantly.
-# - libx264, 1920x1080: 80 fps
-# - libx264, 3840x2160: 18 fps
-# - h264_nvenc, 1920x1080: 190 fps
-# - h264_nvenc, 3840x2160: 41 fps
+# On an idle system (rough guide; will vary widely):
+# - libx264, 1920x1080: ~80 fps
+# - libx264, 3840x2160: ~18 fps
+# - h264_nvenc, 1920x1080: ~190 fps
+# - h264_nvenc, 3840x2160: ~41 fps
 
 import argparse
 import logging
@@ -283,7 +115,7 @@
     # features that are widely supported, but not mandatory.
     "profile": "high",
     # The "medium" preset is as good of a preset as any for a demo
-    # like this.  Different codecs have different presets; the the
+    # like this.  Different codecs have different presets; the
     # h264_nvenc actually prefers "p4", but accepts "medium" as a
     # similar preset.
     "preset": "medium",
@@ -313,7 +145,7 @@ def video_capture(
     shutdown_requested: Event,
 ) -> Generator[tuple[mss.screenshot.ScreenShot, float], None, None]:
     # Keep track of the time when we want to get the next frame.  We
-    # limit the frame time this way instead of sleeping 1/30 sec each
+    # limit the frame time this way instead of sleeping 1/fps sec each
     # frame, since we want to also account for the time taken to get
     # the screenshot and other overhead.
     #
@@ -326,11 +158,9 @@ def video_capture(
     # Keep running this loop until the main thread says we should
     # stop.
     while not shutdown_requested.is_set():
-
         # Wait until we're ready.  This should, ideally, happen every
-        # 1/30 second.
+        # 1/fps second.
         while (now := time.monotonic()) < next_frame_at:
-            # 
             time.sleep(next_frame_at - now)
 
         # Capture a frame, and send it to the next processing stage.
@@ -365,42 +195,51 @@ def video_process(
         tuple[mss.screenshot.ScreenShot, float]
     ],
 ) -> Generator[av.VideoFrame, None, None]:
-    # We track when the first
+    # We track when the first frame happened so we can make PTS start at 0.
+    # Many video players and other tools expect that.
     first_frame_at: float | None = None
 
     for screenshot, timestamp in screenshot_and_timestamp:
-        # A screenshot's pixel data can take a long time to copy.
-        # Just for the CPU to copy the bytes, on my hardware, takes
-        # about 3ms for a 4k screenshot.  This means we want to be
-        # very careful about how we want to get the data from the
-        # ScreenShot object to the VideoFrame.
+        # Avoiding extra pixel copies
+        # ---------------------------
         #
-        # In Python, there's a concept called a "buffer".  This is a
-        # range of memory that can be shared between objects, so the
-        # objects don't have to copy the data.  This is very common in
-        # libraries like NumPy that work with very large datasets, and
-        # interpret that data in different ways.
+        # Copying a full frame of pixels is expensive.  On typical
+        # hardware, a plain CPU memcpy of a 4K BGRA image can cost on
+        # the order of ~3ms by itself, which is a big chunk of a
+        # 30fps budget (33ms) and an even bigger chunk of a 60fps
+        # budget (16.7ms).
         #
-        # The most common buffers are in extensions written in C, but
-        # Python objects of type memoryview, bytes, bytearray, and
-        # array.array are all buffers.  The screenshot.bgra attribute
-        # is also a buffer.  (Currently, it's a bytes object, but this
-        # may change in the future.)
+        # So we want to be careful about the *conversion* step from an
+        # MSS `ScreenShot` to a PyAV `VideoFrame`.  Ideally, that step
+        # should reuse the same underlying bytes rather than creating
+        # additional intermediate copies.
         #
-        # PyAV doesn't let you create a VideoFrame object directly
-        # from pixel data in a buffer.  (It is possible to update the
-        # data in a VideoFrame to point to a different buffer, but
-        # that still allocates the memory first.)
+        # Buffers in Python
+        # -----------------
         #
-        # However, while it's not documented, PyAV does have the
-        # from_numpy_buffer method (separately from the from_ndarray
-        # method).  This creates a VideoFrame that shares memory with
-        # a NumPy array.  We tell NumPy to create a new ndarray that
-        # shares the screenshot's buffer, and create a VideoFrame that
-        # uses that buffer.
+        # Many Python objects expose their underlying memory via the
+        # "buffer protocol".  A buffer is just a view of raw bytes that
+        # other libraries can interpret without copying.
+        #
+        # Common buffer objects include: `bytes`, `bytearray`,
+        # `memoryview`, and `array.array`.  `screenshot.bgra` is also a
+        # buffer (currently it is a `bytes` object, though that detail
+        # may change in the future).
+        #
+        # Minimum-copy path: ScreenShot -> NumPy -> VideoFrame
+        # --------------------------------------------------
+        #
+        # `np.frombuffer()` creates an ndarray *view* of an existing
+        # buffer (no copy).  Reshaping also stays as a view.
+        #
+        # PyAV's `VideoFrame.from_ndarray()` always copies the data
+        # into a new frame-owned buffer.  For this demo we use
+        # the undocumented `VideoFrame.from_numpy_buffer()`, which creates a
+        # `VideoFrame` that shares memory with the ndarray.
         ndarray = np.frombuffer(screenshot.bgra, dtype=np.uint8)
         ndarray = ndarray.reshape(screenshot.height, screenshot.width, 4)
         frame = av.VideoFrame.from_numpy_buffer(ndarray, format="bgra")
+
         # Set the PTS and time base for the frame.
         if first_frame_at is None:
             first_frame_at = timestamp
@@ -430,7 +269,8 @@ def show_stats(
     any individual stage.
     """
     # The start time is only used for showing the clock.  The actual
-    # timing stats all use the times we put in the captured frames.
+    # timing stats use packet timestamps (ultimately derived from the
+    # frame PTS we compute during capture).
     start_time = time.monotonic()
     time_deque: deque[int] = deque(maxlen=100)
     bit_count_deque: deque[int] = deque(maxlen=100)
@@ -443,11 +283,20 @@ def show_stats(
         yield packet_batch
 
         for packet in packet_batch:
-            # The PTS would make more sense for logging FPS than the
-            # DTS, but because of frame reordering, it makes the stats
-            # a little bit unstable.  Using DTS consistently makes the
-            # timing quite stable, and over the 100-frame window,
-            # still quite precise.
+            # FPS from timestamps: why DTS, not PTS?
+            #
+            # Intuitively, you'd expect to compute FPS from PTS (the
+            # time the viewer should *see* each frame).  But encoders
+            # can reorder frames internally (especially with B-frames),
+            # so packets may come out in a different order than PTS.
+            #
+            # If we update a sliding window with out-of-order PTS
+            # values, the window start/end can "wiggle" even when the
+            # pipeline is steady, which makes the displayed FPS noisy.
+            #
+            # DTS is the time order the decoder must process packets.
+            # Packets are emitted in DTS order, so using DTS gives a
+            # stable, monotonic timeline for the sliding window.
             time_deque.append(packet.dts)
             bit_count = packet.size * 8
             bit_count_deque.append(bit_count)
@@ -474,9 +323,11 @@ def show_stats(
             full_line = f"\r{line}{' ' * (last_status_len - this_status_len)}"
             print(full_line, end="")
             last_status_len = this_status_len
-    # It's difficult to correctly print the fps and bitrate near the
-    # tail, since we get the last many frames as a big batch.  Instead
-    # of leaving misleading information on the screen, we erase the
+    # Near shutdown the encoder flush can emit packets in large bursts,
+    # and we also throttle status updates (to avoid spamming the
+    # terminal).  That combination means the last displayed line may be
+    # stale or not representative of the final frames.  Rather than
+    # leaving potentially misleading numbers on screen, erase the
     # status display.
     print(f"\r{' ' * last_status_len}\r", end="")
 
@@ -512,33 +363,42 @@ def main() -> None:
         description="Capture screen video to MP4 file"
     )
     parser.add_argument(
-        "-f", "--fps", type=int, default=30, help="frames per second (default: 30)"
+        "-f",
+        "--fps",
+        type=int,
+        default=30,
+        help="frames per second (default: 30)",
     )
     monitor_group = parser.add_mutually_exclusive_group()
     monitor_group.add_argument(
-        "-m", "--monitor",
+        "-m",
+        "--monitor",
         type=int,
         default=1,
         help="monitor ID to capture (default: 1)",
     )
     monitor_group.add_argument(
-        "-r", "--region",
+        "-r",
+        "--region",
         type=parse_region,
         metavar="LEFT,TOP,RIGHT,BOTTOM",
         help="region to capture as comma-separated coordinates",
     )
     parser.add_argument(
-        "-c", "--codec",
+        "-c",
+        "--codec",
         default="libx264",
         help="video codec (default: libx264; try h264_nvenc for Nvidia hardware encoding)",
     )
     parser.add_argument(
-        "-d", "--duration-secs",
+        "-d",
+        "--duration-secs",
         type=float,
         help="Duration to record (default: no limit)",
     )
     parser.add_argument(
-        "-o", "--output",
+        "-o",
+        "--output",
         default="capture.mp4",
         help="output filename (default: capture.mp4)",
     )
@@ -577,23 +437,32 @@ def main() -> None:
             )
             video_stream.width = monitor["width"]
             video_stream.height = monitor["height"]
-            # Setting the time_base on the stream is possible, but
-            # isn't what we need (for reasons I'm unclear on): we need
-            # to set it on the codec context.
+            # There are multiple time bases in play (stream,
+            # codec context, per-frame).  Depending on the container
+            # and codec, some of these might be ignored or overridden.
+            # We set the desired time base consistently everywhere,
+            # so that the saved timestamps are correct regardless of what
+            # format we're saving to.
+            video_stream.time_base = TIME_BASE
             video_stream.codec_context.time_base = TIME_BASE
-            # Assigning the pix_fmt is telling the video encoder what
-            # we'll be sending it, not necessarily what it will
-            # output.  If the codec supports BGRx inputs, then that's
-            # the most efficient way for us to send it our frames.
-            # Otherwise, there will be a software, CPU-side conversion
-            # step when we send it our BGRx frames.  We're actually
-            # probably sending it frames in BGR0, not BGRA, but PyAV
-            # doesn't claim to support reading frames in BGR0, only
-            # BGRA.  H.264 doesn't support an alpha channel anyway, so
-            # we can just send it BGR0 frames and tell it they're BGRA.
-            if any(
-                f.name == "bgra" for f in video_stream.codec.video_formats
-            ):
+            # `pix_fmt` here describes the pixel format we will *feed*
+            # into the encoder (not necessarily what the encoder will
+            # store in the bitstream).  H.264 encoders ultimately
+            # convert to a YUV format internally.
+            #
+            # If the encoder accepts BGRA input (e.g., h264_nvenc), we
+            # can hand it MSS's BGRA frames directly and avoid an extra
+            # pre-conversion step on our side.
+            #
+            # If the encoder doesn't accept BGRA input (e.g., libx264),
+            # PyAV will insert a conversion step automatically.  In that
+            # case, we let the codec choose the pix_fmt it's going to
+            # expect.
+            #
+            # Note: the alpha channel is ignored by H.264.  We may
+            # effectively be sending BGRx/BGR0.  But PyAV's VideoFrame
+            # only exposes "bgra" as the closest supported format.
+            if any(f.name == "bgra" for f in video_stream.codec.video_formats):
                 video_stream.pix_fmt = "bgra"
             # We open (initialize) the codec explicitly here.  PyAV
             # will automatically open it the first time we call
@@ -658,9 +527,8 @@ def main() -> None:
             LOGGER.debug("  Encode:     %s", stage_video_encode.native_id)
             LOGGER.debug("  Mux:        %s", stage_mux.native_id)
 
-            print("Starting video capture.  Press Ctrl-C to stop.")
+            old_sigint_handler = signal.getsignal(signal.SIGINT)
 
-            old_sigint_handler = None
             def sigint_handler(_signum: int, _frame: Any) -> None:
                 # Restore the default behavior, so if our shutdown
                 # doesn't work because of a bug in our code, the user
@@ -673,7 +541,10 @@ def sigint_handler(_signum: int, _frame: Any) -> None:
                 # a fresh line for this message.
                 print("\nShutting down")
                 shutdown_requested.set()
-            signal.signal(signal.SIGINT, sigint_handler)
+
+            old_sigint_handler = signal.signal(signal.SIGINT, sigint_handler)
+
+            print("Starting video capture.  Press Ctrl-C to stop.")
 
             if duration_secs is not None:
                 stage_video_capture.join(timeout=duration_secs)
@@ -682,17 +553,41 @@ def sigint_handler(_signum: int, _frame: Any) -> None:
                 # shutdown event again, and return to our normal
                 # processing loop.
                 shutdown_requested.set()
-            
+
             stage_video_capture.join()
             stage_video_process.join()
             stage_video_encode.join()
             stage_show_stats.join()
             stage_mux.join()
 
-            if codec != "libx264" and video_stream.reformatter is not None:
+            # PyAV may insert an implicit conversion step between the frames we
+            # provide and what the encoder actually accepts (pixel format,
+            # colorspace, etc.). When that happens, `video_stream.reformatter`
+            # gets set.
+            #
+            # This is useful to know for performance: those conversions are
+            # typically CPU-side work and can become a bottleneck.
+            # Hardware-accelerated encoders, such as `h264_nvenc`, often accept
+            # BGRx, and can perform the conversion using specialized hardware.
+            #
+            # We already know that libx264 doesn't accept RGB input, so
+            # we don't warn about that.  (There is a libx264rgb, but that
+            # uses a H.264 format that is not widely supported.)
+            # We just want to warn about other
+            # codecs, since some of them might have ways to use BGRx input,
+            # and the programmer might want to investigate.
+            #
+            # Note: `reformatter` is created lazily, so it may only be set after
+            # frames have been sent through the encoder, which is why we check
+            # it at the end.
+            if video_stream.reformatter is not None and codec != "libx264":
                 LOGGER.warning(
-                    "Software encoder is in a hardware encoding "
-                    "path; this may slow things down"
+                    "PyAV inserted a CPU-side pixel-format/colorspace conversion "
+                    "step (video_stream.reformatter is set) while encoding with %s; "
+                    "this can reduce FPS.  Check the acceptable pix_fmts for this codec, "
+                    "and see if one of them can accept some variation of BGRx input "
+                    "directly.",
+                    codec,
                 )
 
 

From eed9245badec16caf170c4d05c874b49c8a2b4dc Mon Sep 17 00:00:00 2001
From: Joel Holveck <jholveck@nvidia.com>
Date: Fri, 16 Jan 2026 10:02:09 +0000
Subject: [PATCH 05/16] Add a comment about color spaces

---
 demos/video-capture.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/demos/video-capture.py b/demos/video-capture.py
index ea28c02..836037d 100755
--- a/demos/video-capture.py
+++ b/demos/video-capture.py
@@ -435,6 +435,13 @@ def main() -> None:
             video_stream = avmux.add_stream(
                 codec, rate=fps, options=CODEC_OPTIONS
             )
+            # Ideally, we would set attributes such as colorspace,
+            # color_range, color_primaries, and color_trc here to
+            # describe the colorspace accurately.  This would be
+            # significant if we're capturing on a Display P3 Mac, while
+            # the video file is on an sRGB Windows machine.  Currently,
+            # MSS doesn't give us that information, so we skip it for
+            # now.
             video_stream.width = monitor["width"]
             video_stream.height = monitor["height"]
             # There are multiple time bases in play (stream,

From 03da28dce335fc452037c28296ddfe65bab04561 Mon Sep 17 00:00:00 2001
From: Joel Ray Holveck <jholveck@nvidia.com>
Date: Fri, 16 Jan 2026 17:29:15 -0800
Subject: [PATCH 06/16] Add notes about colorspace tagging

Also reformat the comments.
---
 demos/video-capture.py | 194 ++++++++++++++++++++++++-----------------
 1 file changed, 116 insertions(+), 78 deletions(-)

diff --git a/demos/video-capture.py b/demos/video-capture.py
index 836037d..3ffd1e8 100755
--- a/demos/video-capture.py
+++ b/demos/video-capture.py
@@ -3,7 +3,7 @@
 # This demo shows one common use case for MSS: capture the screen and
 # write a real video file (MP4) rather than saving individual images.
 #
-# It’s intentionally not a full “video encoding” course. The goal is
+# It's intentionally not a full "video encoding" course.  The goal is
 # to explain the few concepts that show up throughout the program so
 # you can read, tweak, and extend it.
 #
@@ -11,9 +11,9 @@
 # ------------------------
 #
 # Most people first meet video encoding through the `ffmpeg` command.
-# Under the hood, ffmpeg is built on the “libav*” C libraries. In this
-# demo we use PyAV (`import av`), which is a Pythonic wrapper around
-# those libraries.
+# Under the hood, ffmpeg is built on the "libav*" C libraries.  In
+# this demo we use PyAV (`import av`), which is a Pythonic wrapper
+# around those libraries.
 #
 # PyAV docs: <https://pyav.basswood-io.com/docs/stable/>
 # Note: the older docs at pyav.org are outdated; see
@@ -24,17 +24,17 @@
 # -------------------------------
 #
 # A file like `capture.mp4` is a *container*: it holds one or more
-# *streams* (usually video and/or audio). This demo writes one video
+# *streams* (usually video and/or audio).  This demo writes one video
 # stream.
 #
-# The container interleaves (“muxes”) stream data so players can read
-# everything in timestamp order. libav calls those pieces “packets”.
-# (In MP4 they’re not literally network-style packets; the term is a
+# The container interleaves ("muxes") stream data so players can read
+# everything in timestamp order. libav calls those pieces "packets".
+# (In MP4 they're not literally network-style packets; the term is a
 # longstanding libav abstraction.)
 #
 # A *codec* is the algorithm that compresses/decompresses a stream.
-# For MP4 video, common codecs include H.264 and H.265. This demo
-# defaults to H.264 via `libx264`, because it’s widely supported. You
+# For MP4 video, common codecs include H.264 and H.265.  This demo
+# defaults to H.264 via `libx264`, because it's widely supported.  You
 # can switch to hardware encoders (e.g. `h264_nvenc`) if available.
 #
 # Frames and frame reordering (I/P/B)
@@ -45,8 +45,9 @@
 # - P-frames: changes from previous frames.
 # - B-frames: changes predicted using both past *and future* frames.
 #
-# B-frames are why “the order frames are encoded/decoded” can differ
-# from “the order frames are shown”. That leads directly to timestamps.
+# B-frames are why "the order frames are encoded/decoded" can differ
+# from "the order frames are shown".  That leads directly to
+# timestamps.
 #
 # Timestamps (PTS/DTS)
 # --------------------
@@ -58,15 +59,15 @@
 # Those packets also have a *decode timestamp* (DTS): when the decoder
 # must decode them so the PTS schedule can be met.
 #
-# In this demo we set PTS on `VideoFrame`s and let libav/PyAV propagate
-# timestamps into the encoded packets.
+# In this demo we set PTS on `VideoFrame`s and let libav/PyAV
+# propagate timestamps into the encoded packets.
 #
 # Time base
 # ---------
 #
 # Timestamps are integers, and their unit is a fraction of a second
-# called the *time base*. For example, with a time base of 1/90000,
-# a timestamp of 90000 means “1 second”. PyAV will convert between time
+# called the *time base*.  For example, with a time base of 1/90000, a
+# timestamp of 90000 means "1 second".  PyAV will convert between time
 # bases when needed, but you must set them consistently where you
 # generate timestamps.
 #
@@ -78,9 +79,9 @@
 # ----------------------------------
 #
 # Capturing frames, converting them to `VideoFrame`s, encoding, and
-# muxing are separate stages. This demo pipelines those stages across
+# muxing are separate stages.  This demo pipelines those stages across
 # threads so that (for example) encoding can run while the next screen
-# grab is happening. The slowest stage typically limits overall FPS.
+# grab is happening.  The slowest stage typically limits overall FPS.
 #
 # On an idle system (rough guide; will vary widely):
 # - libx264, 1920x1080: ~80 fps
@@ -135,6 +136,10 @@
 
 TIME_BASE = Fraction(1, 90000)
 
+# Currently, MSS doesn't give us information about the display's
+# colorspace.  See where this is used below for more information.
+DISPLAY_IS_SRGB = False
+
 LOGGER = logging.getLogger("video-capture")
 
 
@@ -195,8 +200,8 @@ def video_process(
         tuple[mss.screenshot.ScreenShot, float]
     ],
 ) -> Generator[av.VideoFrame, None, None]:
-    # We track when the first frame happened so we can make PTS start at 0.
-    # Many video players and other tools expect that.
+    # We track when the first frame happened so we can make PTS start
+    # at 0.  Many video players and other tools expect that.
     first_frame_at: float | None = None
 
     for screenshot, timestamp in screenshot_and_timestamp:
@@ -205,9 +210,9 @@ def video_process(
         #
         # Copying a full frame of pixels is expensive.  On typical
         # hardware, a plain CPU memcpy of a 4K BGRA image can cost on
-        # the order of ~3ms by itself, which is a big chunk of a
-        # 30fps budget (33ms) and an even bigger chunk of a 60fps
-        # budget (16.7ms).
+        # the order of ~3ms by itself, which is a big chunk of a 30fps
+        # budget (33ms) and an even bigger chunk of a 60fps budget
+        # (16.7ms).
         #
         # So we want to be careful about the *conversion* step from an
         # MSS `ScreenShot` to a PyAV `VideoFrame`.  Ideally, that step
@@ -218,24 +223,24 @@ def video_process(
         # -----------------
         #
         # Many Python objects expose their underlying memory via the
-        # "buffer protocol".  A buffer is just a view of raw bytes that
-        # other libraries can interpret without copying.
+        # "buffer protocol".  A buffer is just a view of raw bytes
+        # that other libraries can interpret without copying.
         #
         # Common buffer objects include: `bytes`, `bytearray`,
-        # `memoryview`, and `array.array`.  `screenshot.bgra` is also a
-        # buffer (currently it is a `bytes` object, though that detail
-        # may change in the future).
+        # `memoryview`, and `array.array`.  `screenshot.bgra` is also
+        # a buffer (currently it is a `bytes` object, though that
+        # detail may change in the future).
         #
         # Minimum-copy path: ScreenShot -> NumPy -> VideoFrame
-        # --------------------------------------------------
+        # ----------------------------------------------------
         #
         # `np.frombuffer()` creates an ndarray *view* of an existing
         # buffer (no copy).  Reshaping also stays as a view.
         #
         # PyAV's `VideoFrame.from_ndarray()` always copies the data
-        # into a new frame-owned buffer.  For this demo we use
-        # the undocumented `VideoFrame.from_numpy_buffer()`, which creates a
-        # `VideoFrame` that shares memory with the ndarray.
+        # into a new frame-owned buffer.  For this demo we use the
+        # undocumented `VideoFrame.from_numpy_buffer()`, which creates
+        # a `VideoFrame` that shares memory with the ndarray.
         ndarray = np.frombuffer(screenshot.bgra, dtype=np.uint8)
         ndarray = ndarray.reshape(screenshot.height, screenshot.width, 4)
         frame = av.VideoFrame.from_numpy_buffer(ndarray, format="bgra")
@@ -245,6 +250,14 @@ def video_process(
             first_frame_at = timestamp
         frame.pts = int((timestamp - first_frame_at) / TIME_BASE)
         frame.time_base = TIME_BASE
+
+        # If we know the colorspace of our frames, mark them
+        # accordingly.  See the comment where we set these attributes
+        # on video_stream for details.
+        if DISPLAY_IS_SRGB:
+            frame.colorspace = av.video.reformatter.Colorspace.ITU709
+            frame.color_range = av.video.reformatter.ColorRange.JPEG
+
         yield frame
 
 
@@ -287,8 +300,9 @@ def show_stats(
             #
             # Intuitively, you'd expect to compute FPS from PTS (the
             # time the viewer should *see* each frame).  But encoders
-            # can reorder frames internally (especially with B-frames),
-            # so packets may come out in a different order than PTS.
+            # can reorder frames internally (especially with
+            # B-frames), so packets may come out in a different order
+            # than PTS.
             #
             # If we update a sliding window with out-of-order PTS
             # values, the window start/end can "wiggle" even when the
@@ -323,11 +337,11 @@ def show_stats(
             full_line = f"\r{line}{' ' * (last_status_len - this_status_len)}"
             print(full_line, end="")
             last_status_len = this_status_len
-    # Near shutdown the encoder flush can emit packets in large bursts,
-    # and we also throttle status updates (to avoid spamming the
-    # terminal).  That combination means the last displayed line may be
-    # stale or not representative of the final frames.  Rather than
-    # leaving potentially misleading numbers on screen, erase the
+    # Near shutdown the encoder flush can emit packets in large
+    # bursts, and we also throttle status updates (to avoid spamming
+    # the terminal).  That combination means the last displayed line
+    # may be stale or not representative of the final frames.  Rather
+    # than leaving potentially misleading numbers on screen, erase the
     # status display.
     print(f"\r{' ' * last_status_len}\r", end="")
 
@@ -435,20 +449,44 @@ def main() -> None:
             video_stream = avmux.add_stream(
                 codec, rate=fps, options=CODEC_OPTIONS
             )
+
             # Ideally, we would set attributes such as colorspace,
             # color_range, color_primaries, and color_trc here to
-            # describe the colorspace accurately.  This would be
-            # significant if we're capturing on a Display P3 Mac, while
-            # the video file is on an sRGB Windows machine.  Currently,
-            # MSS doesn't give us that information, so we skip it for
-            # now.
+            # describe the colorspace accurately.  Otherwise, the
+            # player has to guess whether this was recorded on an sRGB
+            # Windows machine, a Display P3 Mac, or if it's using
+            # linear RGB.  Currently, MSS doesn't give us colorspace
+            # information (DISPLAY_IS_SRGB is always False in this
+            # demo), so we don't try to specify a particular
+            # colorspace.  However, if your application knows the
+            # colorspace you're recording from, then you can set those
+            # attributes on the stream and the frames accordingly.
+            #
+            # These properties on the stream (actually, they're
+            # attached to its CodecContext) are used to tell the
+            # stream and container how to label the video stream's
+            # colorspace.  There are similar attributes on the frame
+            # itself; those are used to identify its colorspace, so
+            # the codec can do the correct RGB to YUV conversion.
+            if DISPLAY_IS_SRGB:
+                video_stream.color_primaries = 1  # libavutil's AVCOL_PRI_BT709; PyAV doesn't define constants for color primaries.
+                video_stream.colorspace = av.video.reformatter.Colorspace.ITU709  # More commonly called BT.709
+                # The "JPEG" color range is saying that we're using a
+                # color range like a computer, not like broadcast TV.
+                video_stream.color_range = av.video.reformatter.ColorRange.JPEG
+                # Technically, sRGB's transformation characteristic is
+                # AVCOL_TRC_IEC61966_2_1.  It's nearly the same as
+                # BT.709's TRC, so some video encoders will tag it as
+                # AVCOL_TRC_BT709 (1) instead.
+                video_stream.color_trc = 13  # libavutil's AVCOL_TRC_IEC61966_2_1; PyAV doesn't define constants for TRCs.
+
             video_stream.width = monitor["width"]
             video_stream.height = monitor["height"]
-            # There are multiple time bases in play (stream,
-            # codec context, per-frame).  Depending on the container
-            # and codec, some of these might be ignored or overridden.
-            # We set the desired time base consistently everywhere,
-            # so that the saved timestamps are correct regardless of what
+            # There are multiple time bases in play (stream, codec
+            # context, per-frame).  Depending on the container and
+            # codec, some of these might be ignored or overridden.  We
+            # set the desired time base consistently everywhere, so
+            # that the saved timestamps are correct regardless of what
             # format we're saving to.
             video_stream.time_base = TIME_BASE
             video_stream.codec_context.time_base = TIME_BASE
@@ -458,13 +496,13 @@ def main() -> None:
             # convert to a YUV format internally.
             #
             # If the encoder accepts BGRA input (e.g., h264_nvenc), we
-            # can hand it MSS's BGRA frames directly and avoid an extra
-            # pre-conversion step on our side.
+            # can hand it MSS's BGRA frames directly and avoid an
+            # extra pre-conversion step on our side.
             #
-            # If the encoder doesn't accept BGRA input (e.g., libx264),
-            # PyAV will insert a conversion step automatically.  In that
-            # case, we let the codec choose the pix_fmt it's going to
-            # expect.
+            # If the encoder doesn't accept BGRA input (e.g.,
+            # libx264), PyAV will insert a conversion step
+            # automatically.  In that case, we let the codec choose
+            # the pix_fmt it's going to expect.
             #
             # Note: the alpha channel is ignored by H.264.  We may
             # effectively be sending BGRx/BGR0.  But PyAV's VideoFrame
@@ -567,34 +605,34 @@ def sigint_handler(_signum: int, _frame: Any) -> None:
             stage_show_stats.join()
             stage_mux.join()
 
-            # PyAV may insert an implicit conversion step between the frames we
-            # provide and what the encoder actually accepts (pixel format,
-            # colorspace, etc.). When that happens, `video_stream.reformatter`
-            # gets set.
+            # PyAV may insert an implicit conversion step between the
+            # frames we provide and what the encoder actually accepts
+            # (pixel format, colorspace, etc.).  When that happens,
+            # `video_stream.reformatter` gets set.
             #
-            # This is useful to know for performance: those conversions are
-            # typically CPU-side work and can become a bottleneck.
-            # Hardware-accelerated encoders, such as `h264_nvenc`, often accept
-            # BGRx, and can perform the conversion using specialized hardware.
+            # This is useful to know for performance: those
+            # conversions are typically CPU-side work and can become a
+            # bottleneck.  Hardware-accelerated encoders, such as
+            # `h264_nvenc`, often accept BGRx, and can perform the
+            # conversion using specialized hardware.
             #
-            # We already know that libx264 doesn't accept RGB input, so
-            # we don't warn about that.  (There is a libx264rgb, but that
-            # uses a H.264 format that is not widely supported.)
-            # We just want to warn about other
-            # codecs, since some of them might have ways to use BGRx input,
-            # and the programmer might want to investigate.
+            # We already know that libx264 doesn't accept RGB input,
+            # so we don't warn about that.  (There is a libx264rgb,
+            # but that writes to a different H.264 format.)  We just
+            # want to warn about other codecs, since some of them
+            # might have ways to use BGRx input, and the programmer
+            # might want to investigate.
             #
-            # Note: `reformatter` is created lazily, so it may only be set after
-            # frames have been sent through the encoder, which is why we check
-            # it at the end.
+            # Note: `reformatter` is created lazily, so it may only be
+            # set after frames have been sent through the encoder,
+            # which is why we check it at the end.
             if video_stream.reformatter is not None and codec != "libx264":
                 LOGGER.warning(
-                    "PyAV inserted a CPU-side pixel-format/colorspace conversion "
-                    "step (video_stream.reformatter is set) while encoding with %s; "
-                    "this can reduce FPS.  Check the acceptable pix_fmts for this codec, "
-                    "and see if one of them can accept some variation of BGRx input "
-                    "directly.",
-                    codec,
+                    "PyAV inserted a CPU-side pixel-format/colorspace "
+                    "conversion step; this can reduce FPS.  Check the "
+                    "acceptable pix_fmts for this codec, and see if one "
+                    "of them can accept some variation of BGRx input "
+                    "directly."
                 )
 
 

From 883f365174bf8c5aed1d6d1d193d1493f94de673 Mon Sep 17 00:00:00 2001
From: Joel Ray Holveck <jholveck@nvidia.com>
Date: Fri, 16 Jan 2026 17:43:46 -0800
Subject: [PATCH 07/16] Add a pointer to the comments in pipeline.py

---
 demos/video-capture.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/demos/video-capture.py b/demos/video-capture.py
index 3ffd1e8..83ee4eb 100755
--- a/demos/video-capture.py
+++ b/demos/video-capture.py
@@ -81,7 +81,11 @@
 # Capturing frames, converting them to `VideoFrame`s, encoding, and
 # muxing are separate stages.  This demo pipelines those stages across
 # threads so that (for example) encoding can run while the next screen
-# grab is happening.  The slowest stage typically limits overall FPS.
+# grab is happening.  The comments at the top of common/pipeline.py
+# describe pipelining in detail.
+#
+# The slowest stage typically limits overall FPS.  Usually, that's the
+# encoder.
 #
 # On an idle system (rough guide; will vary widely):
 # - libx264, 1920x1080: ~80 fps

From dfe27840b4371ce0c88844e9df23ae077f8c5f76 Mon Sep 17 00:00:00 2001
From: Joel Ray Holveck <jholveck@nvidia.com>
Date: Sat, 17 Jan 2026 01:49:59 -0800
Subject: [PATCH 08/16] Add comments and help strings about using other codecs

---
 demos/video-capture.py | 44 +++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/demos/video-capture.py b/demos/video-capture.py
index 83ee4eb..979bebe 100755
--- a/demos/video-capture.py
+++ b/demos/video-capture.py
@@ -113,11 +113,17 @@
 from common.pipeline import Mailbox, PipelineStage
 
 
-# These are the options you'd give to ffmpeg that would affect the
-# video codec.
+# These are the options you'd give to ffmpeg that it sends to the
+# video codec.  The options you can use here can be listed with
+# `ffmpeg -help encoder=libx264`, or whatever encoder you're using for
+# this demo's `--codec` flag.  The options for each encoder are described
+# in more detail in `man ffmpeg-codecs`.
 CODEC_OPTIONS = {
     # The "high" profile means that the encoder can use some H.264
-    # features that are widely supported, but not mandatory.
+    # features that are widely supported, but not mandatory.  If
+    # you're using a codec other than H.264, you'll need to comment
+    # out this line: the relevant features are already part of the
+    # main profile in later codecs like H.265, VP8, VP9, and AV1.
     "profile": "high",
     # The "medium" preset is as good of a preset as any for a demo
     # like this.  Different codecs have different presets; the
@@ -406,7 +412,12 @@ def main() -> None:
         "-c",
         "--codec",
         default="libx264",
-        help="video codec (default: libx264; try h264_nvenc for Nvidia hardware encoding)",
+        help=(
+            'video codec implementation, same as the ffmpeg "-c:v" flag.  '
+            'Run "python3 -m av --codecs" for a full list.  '
+            "(default: libx264.  Try h264_nvenc for Nvidia "
+            "hardware encoding.)"
+        ),
     )
     parser.add_argument(
         "-d",
@@ -439,6 +450,8 @@ def main() -> None:
         else:
             monitor = sct.monitors[args.monitor]
 
+        # We don't pass the container format to av.open here, so it
+        # will choose it based on the extension: .mp4, .mkv, etc.
         with av.open(filename, "w") as avmux:
             # We could initialize video_stream in video_encode, but
             # doing it here means that we can open it before starting
@@ -473,16 +486,25 @@ def main() -> None:
             # itself; those are used to identify its colorspace, so
             # the codec can do the correct RGB to YUV conversion.
             if DISPLAY_IS_SRGB:
-                video_stream.color_primaries = 1  # libavutil's AVCOL_PRI_BT709; PyAV doesn't define constants for color primaries.
-                video_stream.colorspace = av.video.reformatter.Colorspace.ITU709  # More commonly called BT.709
+                # color_primaries=1 is libavutil's AVCOL_PRI_BT709;
+                # PyAV doesn't define named constants for color
+                # primaries.
+                video_stream.color_primaries = 1
+                # What PyAV refers to as ITU709 is more commonly known
+                # as BT.709.
+                video_stream.colorspace = (
+                    av.video.reformatter.Colorspace.ITU709
+                )
                 # The "JPEG" color range is saying that we're using a
                 # color range like a computer, not like broadcast TV.
                 video_stream.color_range = av.video.reformatter.ColorRange.JPEG
-                # Technically, sRGB's transformation characteristic is
-                # AVCOL_TRC_IEC61966_2_1.  It's nearly the same as
-                # BT.709's TRC, so some video encoders will tag it as
-                # AVCOL_TRC_BT709 (1) instead.
-                video_stream.color_trc = 13  # libavutil's AVCOL_TRC_IEC61966_2_1; PyAV doesn't define constants for TRCs.
+                # PyAV doesn't define named constants for TRCs, so we
+                # pass it a numeric value.  Technically, sRGB's
+                # transformation characteristic is
+                # AVCOL_TRC_IEC61966_2_1 (13).  It's nearly the same
+                # as BT.709's TRC, so some video encoders will tag it
+                # as AVCOL_TRC_BT709 (1) instead.
+                video_stream.color_trc = 13
 
             video_stream.width = monitor["width"]
             video_stream.height = monitor["height"]

From 8705054dfa1d9f8eed760e4d91b0887283dfffff Mon Sep 17 00:00:00 2001
From: Joel Ray Holveck <jholveck@nvidia.com>
Date: Sun, 18 Jan 2026 16:59:35 -0800
Subject: [PATCH 09/16] Add a simple version

---
 demos/video-capture-simple.py | 225 ++++++++++++++++++++++++++++++++++
 1 file changed, 225 insertions(+)
 create mode 100755 demos/video-capture-simple.py

diff --git a/demos/video-capture-simple.py b/demos/video-capture-simple.py
new file mode 100755
index 0000000..aa17a8d
--- /dev/null
+++ b/demos/video-capture-simple.py
@@ -0,0 +1,225 @@
+#! /usr/bin/env python3
+
+# A lot of people want to use MSS to record a video of the screen.
+# Doing it really well can be difficult - there's a reason OBS is such
+# a significant program - but the basics are surprisingly easy!
+#
+# There's a more advanced example, video-capture-stream.py, that has
+# more features, and better performance.  But this simple demo is
+# easier to understand, because it does everything in a
+# straightforward way, without any complicated features.
+#
+# Here, we're going to record the screen for 10 seconds, and save the
+# result in capture.mp4, as an H.264 video stream.
+#
+# Sometimes, in film, cameramen will "undercrank", filming the action
+# at a slower frame rate than how it will eventually be projected.  In
+# that case, motion appears artificially sped up, either for comedy
+# (like the Benny Hill TV show), or for fast and frenetic action (like
+# Mad Max: Fury Road).
+#
+# In this demo, we put in the file a marker saying that it's at 30
+# fps.  But since this is a simple demo, your computer might not be
+# able to keep up with writing video frames at that speed.  In that
+# case, you'll see the same effect: sped-up motion.
+#
+# The advanced demo has several techniques to mitigate that.  First,
+# it uses pipelined threads to let the video encoder use a full CPU
+# core (often more, internally), rather than having to share a CPU
+# core with all the other tasks.  Second, it puts a timestamp marker
+# on each frame saying exactly when it's supposed to be shown, rather
+# than just saying to show all the frames at 30 fps.
+#
+# For this simple demo, though, we just record the frames and add them
+# to the file one at a time.
+#
+# We use three libraries that don't come with Python: Pillow, PyAV,
+# and (of course) MSS.  You'll need to install those with "pip install
+# pillow av mss".  Normally, you'll want to install these into a venv;
+# if you don't know about those, there are lots of great tutorials
+# online.
+
+import logging
+import time
+
+import av
+from PIL import Image
+
+import mss
+
+# These are the options you'd give to ffmpeg that would affect the way
+# the video is encoded.  There are comments in the advanced demo that
+# go into more detail.
+CODEC_OPTIONS = {
+    "profile": "high",
+    "preset": "medium",
+    "b": "6M",
+    "rc-lookahead": "40",
+}
+
+# We'll try to capture at 30 fps, if the system can keep up with it
+# (typically, that's possible at 1080p, but not at 4k).  Regardless of
+# what the system can keep up with, we'll mark the file as being at 30
+# fps.
+FPS = 30
+
+# The program will exit after 10 seconds of recording.
+CAPTURE_SECONDS = 10
+
+# Within an MP4 file, the video can be stored in a lot of different
+# formats.  In this demo, we use H.264, since it's the most widely
+# supported.
+#
+# In ffmpeg, and the av libraries that we use here, the best codec for
+# H.264 that doesn't require any specific hardware is libx264.  There
+# are faster ones that are hardware-accelerated, such as h264_nvenc
+# which uses specialized chips on Nvidia video cards.
+CODEC = "libx264"
+
+FILENAME = "capture.mp4"
+
+
+def main() -> None:
+    logging.basicConfig(level=logging.DEBUG)
+    # If we don't enable PyAV's own logging, a lot of important error
+    # messages from libav won't be shown.
+    av.logging.set_level(av.logging.VERBOSE)
+
+    with mss.mss() as sct:
+        monitor = sct.monitors[1]
+
+        with av.open(FILENAME, "w") as avmux:
+            # The "avmux" object we get back from "av.open" represents
+            # the MP4 file.  That's a container that holds the video,
+            # as well as possibly audio and more.  These are each
+            # called "streams".  We only create one stream here, since
+            # we're just recording video.
+            video_stream = avmux.add_stream(
+                CODEC, rate=FPS, options=CODEC_OPTIONS
+            )
+            video_stream.width = monitor["width"]
+            video_stream.height = monitor["height"]
+            # There are more options you can set on the video stream;
+            # the advanced demo uses some of those.
+
+            # Count how many frames we're capturing, so we can log
+            # the FPS later.
+            frame_count = 0
+
+            # Mark the times when we start and end the recording.
+            capture_start_time = time.monotonic()
+            capture_end_time = capture_start_time + CAPTURE_SECONDS
+
+            # MSS can capture very fast, and libav can encode very
+            # fast, depending on your hardware and screen size.  We
+            # don't want to capture faster than 30 fps (or whatever
+            # you set FPS to).  To slow down to our desired rate, we
+            # keep a variable "next_frame_time" to track when it's
+            # time to track the next frame.
+            #
+            # Some programs will just sleep for 1/30 sec in each loop.
+            # But by tracking the time when we want to capture the
+            # next frame, instead of always sleeping for 1/30 sec, the
+            # time that is spent doing the capture and encode (which
+            # can be substantial) is counted as part of the total time
+            # we need to delay.
+            next_frame_time = capture_start_time
+
+            print("Capturing to", FILENAME, "for", CAPTURE_SECONDS, "seconds")
+            while True:
+                # Wait until we reach the time for the next frame.
+                while (now := time.monotonic()) < next_frame_time:
+                    time.sleep(next_frame_time - now)
+
+                # Try to capture the next frame 1/30 sec after our
+                # target time for this frame.  We update this based on
+                # the target time instead of the actual time so that,
+                # if we were a little slow capturing this frame, we'll
+                # be a little fast capturing the next one, and even
+                # things out.  (There's a slightly better, but more
+                # complex, way to update next_frame_time in the
+                # advanced demo.)
+                next_frame_time = next_frame_time + 1 / FPS
+
+                # See if we've finished the requested capture
+                # duration.
+                if now > capture_end_time:
+                    break
+
+                # Print dots for each frame, so you know it's not
+                # frozen.
+                print(".", end="", flush=True)
+
+                # Grab a screenshot.
+                screenshot = sct.grab(monitor)
+                frame_count += 1
+
+                # There are a few ways to get the screenshot into a
+                # VideoFrame.  The highest-performance way isn't hard,
+                # and is shown in the advanced demo: search for
+                # from_numpy_buffer.  But the most obvious way is to
+                # use PIL: you can create an Image from the
+                # screenshot, and create a VideoFrame from that.  That
+                # said, if you want to boost the fps rate by about
+                # 50%, check out the advanced demo, and search for
+                # from_numpy_buffer.
+                img = Image.frombytes(
+                    "RGB", screenshot.size, screenshot.bgra, "raw", "BGRX"
+                )
+                frame = av.VideoFrame.from_image(img)
+
+                # When we encode frames, we get back a list of
+                # packets.  Often, we'll get no packets at first: the
+                # video encoder wants to wait and see the motion
+                # before it decides how it wants to encode the frames.
+                # Later, once it's decided about the earlier frames,
+                # we'll start getting those packets, while it's
+                # holding on to later frames.
+                #
+                # You can imagine that the encoder is a factory.
+                # You're providing it frames, one at a time, each as a
+                # box of raw materials.  It cranks out packets as its
+                # finished product.  But there's some delay while it's
+                # working.  You can imagine these on a conveyor belt
+                # moving left to right as time progresses:
+                #
+                #   FRAMES       ENCODER      PACKETS
+                # [1]________-> (Factory) ->____________
+                # [3]_[2]_[1]-> (Factory) ->____________
+                # [6]_[5]_[4]-> (Factory) ->{1}_________
+                # [8]_[7]_[6]-> (Factory) ->{3}_{2}_{1}_
+                #
+                # Sometimes, when you send in a frame, you'll get no
+                # packets, sometimes you'll get one, and sometimes
+                # you'll get a batch of several.  It depends on how
+                # the encoder works.
+                #
+                # The point is, the packets you're getting back from
+                # this call are whatever the encoder is ready to give
+                # you, not necessarily the packets related to the
+                # frame you're handing it right now.
+                packets = video_stream.encode(frame)
+
+                # As we said, the MP4 file is a bunch of packets from
+                # possibly many streams, all woven (or "muxed")
+                # together.  So the ultimate destination of the data
+                # is to send it to the MP4 file, avmux.
+                avmux.mux(packets)
+
+            # Print an empty line to end our line of dots.
+            print()
+
+            # Earlier, we mentioned that the encoder might hold onto
+            # some frames, while it decides how to encode them based
+            # on future frames.  Now that we're done sending it
+            # frames, we need to get the packets for any frames it's
+            # still holding onto.  We do this by sending None instead
+            # of a frame object.
+            packets = video_stream.encode(None)
+            avmux.mux(packets)
+
+    print(f"Capture complete: {frame_count / CAPTURE_SECONDS:.1f} fps")
+
+
+if __name__ == "__main__":
+    main()

From 76d3b853725e0c5c88619021c4eec5111f73e049 Mon Sep 17 00:00:00 2001
From: Joel Ray Holveck <jholveck@nvidia.com>
Date: Sun, 18 Jan 2026 17:50:00 -0800
Subject: [PATCH 10/16] Add information about VFR

---
 demos/video-capture.py | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/demos/video-capture.py b/demos/video-capture.py
index 979bebe..5363bfd 100755
--- a/demos/video-capture.py
+++ b/demos/video-capture.py
@@ -75,6 +75,35 @@
 #
 # This demo uses a time base of 1/90000 (a common MPEG-derived choice).
 #
+# Constant Frame Rate (CFR) and Variable Frame Rate (VFR)
+# -------------------------------------------------------
+#
+# Many video files run at a fixed frame rate, like 30 fps.  Each frame
+# is shown at 1/30 sec intervals.  This is called *constant frame
+# rate*, or *CFR*, and that's what we do in the simple version of this
+# demo.
+#
+# One problem with this is that, if the encoder can't keep up, the
+# video will appear sped-up when played back.  The comments at the
+# beginning of the simple version of this demo go into more detail
+# about that problem.
+#
+# In this advanced version, we use *variable frame rate*, or *VFR*.
+# That's because we can't be sure that the encoder will be able to
+# work fast enough: we haven't tuned its settings for your screen
+# resolution and hardware.  While the encoder might be fast enough, it
+# might only be able to operate at 18 fps, or even less.
+#
+# Instead, we mark each frame with the correct time that it should be
+# shown.  Even if the encoder is falling behind, its frames are still
+# marked with the right times, so the player will just keep the
+# previous frame on the screen a little longer.
+#
+# Some video editing software historically has had problems with VFR
+# video.  It's much better now than it was a few years ago, but if you
+# plan to edit the video, you may need to convert it to CFR.  There
+# are many resources online about how to do that.
+#
 # Performance (why multiple threads?)
 # ----------------------------------
 #
@@ -128,7 +157,8 @@
     # The "medium" preset is as good of a preset as any for a demo
     # like this.  Different codecs have different presets; the
     # h264_nvenc actually prefers "p4", but accepts "medium" as a
-    # similar preset.
+    # similar preset.  You might prefer "fast" if you're not getting
+    # enough FPS.
     "preset": "medium",
     # 6 Mbit/sec is vaguely the ballpark for a good-quality video at
     # 1080p and 30 fps, but there's a lot of variation.  We're just

From 44d795bcf876787d23558c193367eff4a3aaea3a Mon Sep 17 00:00:00 2001
From: Joel Ray Holveck <jholveck@nvidia.com>
Date: Tue, 20 Jan 2026 13:25:40 -0800
Subject: [PATCH 11/16] Add a comment about the term "flushing" the video
 stream

---
 demos/video-capture-simple.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/demos/video-capture-simple.py b/demos/video-capture-simple.py
index aa17a8d..0396ba6 100755
--- a/demos/video-capture-simple.py
+++ b/demos/video-capture-simple.py
@@ -213,8 +213,9 @@ def main() -> None:
             # some frames, while it decides how to encode them based
             # on future frames.  Now that we're done sending it
             # frames, we need to get the packets for any frames it's
-            # still holding onto.  We do this by sending None instead
-            # of a frame object.
+            # still holding onto.  This is referred to as "flushing"
+            # the stream.  We do this by sending None instead of a
+            # frame object.
             packets = video_stream.encode(None)
             avmux.mux(packets)
 

From 06b88850157498ed071a7825f9226a74b8cd209b Mon Sep 17 00:00:00 2001
From: Joel Ray Holveck <jholveck@nvidia.com>
Date: Tue, 20 Jan 2026 15:45:41 -0800
Subject: [PATCH 12/16] Add comments about installing third-party libs with pip

---
 demos/video-capture-simple.py | 1 +
 demos/video-capture.py        | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/demos/video-capture-simple.py b/demos/video-capture-simple.py
index 0396ba6..2292279 100755
--- a/demos/video-capture-simple.py
+++ b/demos/video-capture-simple.py
@@ -42,6 +42,7 @@
 import logging
 import time
 
+# Install the necessary libraries with "pip install av mss pillow".
 import av
 from PIL import Image
 
diff --git a/demos/video-capture.py b/demos/video-capture.py
index 5363bfd..813f920 100755
--- a/demos/video-capture.py
+++ b/demos/video-capture.py
@@ -10,6 +10,13 @@
 # What tools are we using?
 # ------------------------
 #
+# You'll need a few libraries that don't come with Python: PyAV,
+# NumPy, SI-Prefix, and (of course) MSS.  You'll need to install those
+# with "pip install av mss numpy si-prefix".  Normally, you'll want to
+# install these into a venv; if you don't know about those, there are
+# lots of great tutorials online.  The most critical one we use is
+# PyAV.
+#
 # Most people first meet video encoding through the `ffmpeg` command.
 # Under the hood, ffmpeg is built on the "libav*" C libraries.  In
 # this demo we use PyAV (`import av`), which is a Pythonic wrapper
@@ -134,6 +141,7 @@
 from threading import Event
 from typing import Any
 
+# Install the necessary libraries with "pip install av mss numpy si-prefix".
 import av
 import numpy as np
 from si_prefix import si_format

From 974b822e1430b3ad1b4b8e6014ccf1cf5ff3c6d7 Mon Sep 17 00:00:00 2001
From: Joel Ray Holveck <jholveck@nvidia.com>
Date: Tue, 20 Jan 2026 22:44:40 -0800
Subject: [PATCH 13/16] Reformat to 120-wide project standard

---
 demos/video-capture-simple.py | 193 +++++---------
 demos/video-capture.py        | 468 +++++++++++++---------------------
 2 files changed, 244 insertions(+), 417 deletions(-)

diff --git a/demos/video-capture-simple.py b/demos/video-capture-simple.py
index 2292279..086dbaa 100755
--- a/demos/video-capture-simple.py
+++ b/demos/video-capture-simple.py
@@ -1,43 +1,31 @@
 #! /usr/bin/env python3
 
-# A lot of people want to use MSS to record a video of the screen.
-# Doing it really well can be difficult - there's a reason OBS is such
-# a significant program - but the basics are surprisingly easy!
+# A lot of people want to use MSS to record a video of the screen.  Doing it really well can be difficult - there's a
+# reason OBS is such a significant program - but the basics are surprisingly easy!
 #
-# There's a more advanced example, video-capture-stream.py, that has
-# more features, and better performance.  But this simple demo is
-# easier to understand, because it does everything in a
-# straightforward way, without any complicated features.
+# There's a more advanced example, video-capture.py, that has more features, and better performance.  But this simple
+# demo is easier to understand, because it does everything in a straightforward way, without any complicated features.
 #
-# Here, we're going to record the screen for 10 seconds, and save the
-# result in capture.mp4, as an H.264 video stream.
+# Here, we're going to record the screen for 10 seconds, and save the result in capture.mp4, as an H.264 video stream.
 #
-# Sometimes, in film, cameramen will "undercrank", filming the action
-# at a slower frame rate than how it will eventually be projected.  In
-# that case, motion appears artificially sped up, either for comedy
-# (like the Benny Hill TV show), or for fast and frenetic action (like
-# Mad Max: Fury Road).
+# Sometimes, in film, cameramen will "undercrank", filming the action at a slower frame rate than how it will
+# eventually be projected.  In that case, motion appears artificially sped up, either for comedy (like the Benny Hill
+# TV show), or for fast and frenetic action (like Mad Max: Fury Road).
 #
-# In this demo, we put in the file a marker saying that it's at 30
-# fps.  But since this is a simple demo, your computer might not be
-# able to keep up with writing video frames at that speed.  In that
-# case, you'll see the same effect: sped-up motion.
+# In this demo, we put in the file a marker saying that it's at 30 fps.  But since this is a simple demo, your
+# computer might not be able to keep up with writing video frames at that speed.  In that case, you'll see the same
+# effect: sped-up motion.
 #
-# The advanced demo has several techniques to mitigate that.  First,
-# it uses pipelined threads to let the video encoder use a full CPU
-# core (often more, internally), rather than having to share a CPU
-# core with all the other tasks.  Second, it puts a timestamp marker
-# on each frame saying exactly when it's supposed to be shown, rather
-# than just saying to show all the frames at 30 fps.
+# The full demo has several techniques to mitigate that.  First, it uses pipelined threads to let the video encoder
+# use a full CPU core (often more, internally), rather than having to share a CPU core with all the other tasks.
+# Second, it puts a timestamp marker on each frame saying exactly when it's supposed to be shown, rather than just
+# saying to show all the frames at 30 fps.
 #
-# For this simple demo, though, we just record the frames and add them
-# to the file one at a time.
+# For this simple demo, though, we just record the frames and add them to the file one at a time.
 #
-# We use three libraries that don't come with Python: Pillow, PyAV,
-# and (of course) MSS.  You'll need to install those with "pip install
-# pillow av mss".  Normally, you'll want to install these into a venv;
-# if you don't know about those, there are lots of great tutorials
-# online.
+# We use three libraries that don't come with Python: Pillow, PyAV, and (of course) MSS.  You'll need to install those
+# with "pip install pillow av mss".  Normally, you'll want to install these into a venv; if you don't know about
+# those, there are lots of great tutorials online.
 
 import logging
 import time
@@ -48,9 +36,8 @@
 
 import mss
 
-# These are the options you'd give to ffmpeg that would affect the way
-# the video is encoded.  There are comments in the advanced demo that
-# go into more detail.
+# These are the options you'd give to ffmpeg that would affect the way the video is encoded.  There are comments in
+# the full demo that go into more detail.
 CODEC_OPTIONS = {
     "profile": "high",
     "preset": "medium",
@@ -58,23 +45,19 @@
     "rc-lookahead": "40",
 }
 
-# We'll try to capture at 30 fps, if the system can keep up with it
-# (typically, that's possible at 1080p, but not at 4k).  Regardless of
-# what the system can keep up with, we'll mark the file as being at 30
-# fps.
+# We'll try to capture at 30 fps, if the system can keep up with it (typically, that's possible at 1080p, but not at
+# 4k).  Regardless of what the system can keep up with, we'll mark the file as being at 30 fps.
 FPS = 30
 
 # The program will exit after 10 seconds of recording.
 CAPTURE_SECONDS = 10
 
-# Within an MP4 file, the video can be stored in a lot of different
-# formats.  In this demo, we use H.264, since it's the most widely
-# supported.
+# Within an MP4 file, the video can be stored in a lot of different formats.  In this demo, we use H.264, since it's
+# the most widely supported.
 #
-# In ffmpeg, and the av libraries that we use here, the best codec for
-# H.264 that doesn't require any specific hardware is libx264.  There
-# are faster ones that are hardware-accelerated, such as h264_nvenc
-# which uses specialized chips on Nvidia video cards.
+# In ffmpeg, and the av libraries that we use here, the best codec for H.264 that doesn't require any specific
+# hardware is libx264.  There are faster ones that are hardware-accelerated, such as h264_nvenc which uses specialized
+# chips on Nvidia video cards.
 CODEC = "libx264"
 
 FILENAME = "capture.mp4"
@@ -82,48 +65,35 @@
 
 def main() -> None:
     logging.basicConfig(level=logging.DEBUG)
-    # If we don't enable PyAV's own logging, a lot of important error
-    # messages from libav won't be shown.
+    # If we don't enable PyAV's own logging, a lot of important error messages from libav won't be shown.
     av.logging.set_level(av.logging.VERBOSE)
 
     with mss.mss() as sct:
         monitor = sct.monitors[1]
 
         with av.open(FILENAME, "w") as avmux:
-            # The "avmux" object we get back from "av.open" represents
-            # the MP4 file.  That's a container that holds the video,
-            # as well as possibly audio and more.  These are each
-            # called "streams".  We only create one stream here, since
-            # we're just recording video.
-            video_stream = avmux.add_stream(
-                CODEC, rate=FPS, options=CODEC_OPTIONS
-            )
+            # The "avmux" object we get back from "av.open" represents the MP4 file.  That's a container that holds
+            # the video, as well as possibly audio and more.  These are each called "streams".  We only create one
+            # stream here, since we're just recording video.
+            video_stream = avmux.add_stream(CODEC, rate=FPS, options=CODEC_OPTIONS)
             video_stream.width = monitor["width"]
             video_stream.height = monitor["height"]
-            # There are more options you can set on the video stream;
-            # the advanced demo uses some of those.
+            # There are more options you can set on the video stream; the full demo uses some of those.
 
-            # Count how many frames we're capturing, so we can log
-            # the FPS later.
+            # Count how many frames we're capturing, so we can log the FPS later.
             frame_count = 0
 
             # Mark the times when we start and end the recording.
             capture_start_time = time.monotonic()
             capture_end_time = capture_start_time + CAPTURE_SECONDS
 
-            # MSS can capture very fast, and libav can encode very
-            # fast, depending on your hardware and screen size.  We
-            # don't want to capture faster than 30 fps (or whatever
-            # you set FPS to).  To slow down to our desired rate, we
-            # keep a variable "next_frame_time" to track when it's
-            # time to track the next frame.
+            # MSS can capture very fast, and libav can encode very fast, depending on your hardware and screen size.
+            # We don't want to capture faster than 30 fps (or whatever you set FPS to).  To slow down to our desired
+            # rate, we keep a variable "next_frame_time" to track when it's time to track the next frame.
             #
-            # Some programs will just sleep for 1/30 sec in each loop.
-            # But by tracking the time when we want to capture the
-            # next frame, instead of always sleeping for 1/30 sec, the
-            # time that is spent doing the capture and encode (which
-            # can be substantial) is counted as part of the total time
-            # we need to delay.
+            # Some programs will just sleep for 1/30 sec in each loop.  But by tracking the time when we want to
+            # capture the next frame, instead of always sleeping for 1/30 sec, the time that is spent doing the
+            # capture and encode (which can be substantial) is counted as part of the total time we need to delay.
             next_frame_time = capture_start_time
 
             print("Capturing to", FILENAME, "for", CAPTURE_SECONDS, "seconds")
@@ -132,57 +102,39 @@ def main() -> None:
                 while (now := time.monotonic()) < next_frame_time:
                     time.sleep(next_frame_time - now)
 
-                # Try to capture the next frame 1/30 sec after our
-                # target time for this frame.  We update this based on
-                # the target time instead of the actual time so that,
-                # if we were a little slow capturing this frame, we'll
-                # be a little fast capturing the next one, and even
-                # things out.  (There's a slightly better, but more
-                # complex, way to update next_frame_time in the
-                # advanced demo.)
+                # Try to capture the next frame 1/30 sec after our target time for this frame.  We update this based
+                # on the target time instead of the actual time so that, if we were a little slow capturing this
+                # frame, we'll be a little fast capturing the next one, and even things out.  (There's a slightly
+                # better, but more complex, way to update next_frame_time in the full demo.)
                 next_frame_time = next_frame_time + 1 / FPS
 
-                # See if we've finished the requested capture
-                # duration.
+                # See if we've finished the requested capture duration.
                 if now > capture_end_time:
                     break
 
-                # Print dots for each frame, so you know it's not
-                # frozen.
+                # Print dots for each frame, so you know it's not frozen.
                 print(".", end="", flush=True)
 
                 # Grab a screenshot.
                 screenshot = sct.grab(monitor)
                 frame_count += 1
 
-                # There are a few ways to get the screenshot into a
-                # VideoFrame.  The highest-performance way isn't hard,
-                # and is shown in the advanced demo: search for
-                # from_numpy_buffer.  But the most obvious way is to
-                # use PIL: you can create an Image from the
-                # screenshot, and create a VideoFrame from that.  That
-                # said, if you want to boost the fps rate by about
-                # 50%, check out the advanced demo, and search for
+                # There are a few ways to get the screenshot into a VideoFrame.  The highest-performance way isn't
+                # hard, and is shown in the full demo: search for from_numpy_buffer.  But the most obvious way is to
+                # use PIL: you can create an Image from the screenshot, and create a VideoFrame from that.  That said,
+                # if you want to boost the fps rate by about 50%, check out the full demo, and search for
                 # from_numpy_buffer.
-                img = Image.frombytes(
-                    "RGB", screenshot.size, screenshot.bgra, "raw", "BGRX"
-                )
+                img = Image.frombytes("RGB", screenshot.size, screenshot.bgra, "raw", "BGRX")
                 frame = av.VideoFrame.from_image(img)
 
-                # When we encode frames, we get back a list of
-                # packets.  Often, we'll get no packets at first: the
-                # video encoder wants to wait and see the motion
-                # before it decides how it wants to encode the frames.
-                # Later, once it's decided about the earlier frames,
-                # we'll start getting those packets, while it's
+                # When we encode frames, we get back a list of packets.  Often, we'll get no packets at first: the
+                # video encoder wants to wait and see the motion before it decides how it wants to encode the frames.
+                # Later, once it's decided about the earlier frames, we'll start getting those packets, while it's
                 # holding on to later frames.
                 #
-                # You can imagine that the encoder is a factory.
-                # You're providing it frames, one at a time, each as a
-                # box of raw materials.  It cranks out packets as its
-                # finished product.  But there's some delay while it's
-                # working.  You can imagine these on a conveyor belt
-                # moving left to right as time progresses:
+                # You can imagine that the encoder is a factory.  You're providing it frames, one at a time, each as a
+                # box of raw materials.  It cranks out packets as its finished product.  But there's some delay while
+                # it's working.  You can imagine these on a conveyor belt moving left to right as time progresses:
                 #
                 #   FRAMES       ENCODER      PACKETS
                 # [1]________-> (Factory) ->____________
@@ -190,33 +142,24 @@ def main() -> None:
                 # [6]_[5]_[4]-> (Factory) ->{1}_________
                 # [8]_[7]_[6]-> (Factory) ->{3}_{2}_{1}_
                 #
-                # Sometimes, when you send in a frame, you'll get no
-                # packets, sometimes you'll get one, and sometimes
-                # you'll get a batch of several.  It depends on how
-                # the encoder works.
+                # Sometimes, when you send in a frame, you'll get no packets, sometimes you'll get one, and sometimes
+                # you'll get a batch of several.  It depends on how the encoder works.
                 #
-                # The point is, the packets you're getting back from
-                # this call are whatever the encoder is ready to give
-                # you, not necessarily the packets related to the
-                # frame you're handing it right now.
+                # The point is, the packets you're getting back from this call are whatever the encoder is ready to
+                # give you, not necessarily the packets related to the frame you're handing it right now.
                 packets = video_stream.encode(frame)
 
-                # As we said, the MP4 file is a bunch of packets from
-                # possibly many streams, all woven (or "muxed")
-                # together.  So the ultimate destination of the data
-                # is to send it to the MP4 file, avmux.
+                # As we said, the MP4 file is a bunch of packets from possibly many streams, all woven (or "muxed")
+                # together.  So the ultimate destination of the data is to send it to the MP4 file, avmux.
                 avmux.mux(packets)
 
             # Print an empty line to end our line of dots.
             print()
 
-            # Earlier, we mentioned that the encoder might hold onto
-            # some frames, while it decides how to encode them based
-            # on future frames.  Now that we're done sending it
-            # frames, we need to get the packets for any frames it's
-            # still holding onto.  This is referred to as "flushing"
-            # the stream.  We do this by sending None instead of a
-            # frame object.
+            # Earlier, we mentioned that the encoder might hold onto some frames, while it decides how to encode them
+            # based on future frames.  Now that we're done sending it frames, we need to get the packets for any
+            # frames it's still holding onto.  This is referred to as "flushing" the stream.  We do this by sending
+            # None instead of a frame object.
             packets = video_stream.encode(None)
             avmux.mux(packets)
 
diff --git a/demos/video-capture.py b/demos/video-capture.py
index 813f920..098d9cb 100755
--- a/demos/video-capture.py
+++ b/demos/video-capture.py
@@ -1,26 +1,20 @@
 #! /usr/bin/env python3
 
-# This demo shows one common use case for MSS: capture the screen and
-# write a real video file (MP4) rather than saving individual images.
+# This demo shows one common use case for MSS: capture the screen and write a real video file (MP4) rather than saving
+# individual images.
 #
-# It's intentionally not a full "video encoding" course.  The goal is
-# to explain the few concepts that show up throughout the program so
-# you can read, tweak, and extend it.
+# It's intentionally not a full "video encoding" course.  The goal is to explain the few concepts that show up
+# throughout the program so you can read, tweak, and extend it.
 #
 # What tools are we using?
 # ------------------------
 #
-# You'll need a few libraries that don't come with Python: PyAV,
-# NumPy, SI-Prefix, and (of course) MSS.  You'll need to install those
-# with "pip install av mss numpy si-prefix".  Normally, you'll want to
-# install these into a venv; if you don't know about those, there are
-# lots of great tutorials online.  The most critical one we use is
-# PyAV.
+# You'll need a few libraries that don't come with Python: PyAV, NumPy, SI-Prefix, and (of course) MSS.  You'll need
+# to install those with "pip install av mss numpy si-prefix".  Normally, you'll want to install these into a venv; if
+# you don't know about those, there are lots of great tutorials online.  The most critical one we use is PyAV.
 #
-# Most people first meet video encoding through the `ffmpeg` command.
-# Under the hood, ffmpeg is built on the "libav*" C libraries.  In
-# this demo we use PyAV (`import av`), which is a Pythonic wrapper
-# around those libraries.
+# Most people first meet video encoding through the `ffmpeg` command.  Under the hood, ffmpeg is built on the "libav*"
+# C libraries.  In this demo we use PyAV (`import av`), which is a Pythonic wrapper around those libraries.
 #
 # PyAV docs: <https://pyav.basswood-io.com/docs/stable/>
 # Note: the older docs at pyav.org are outdated; see
@@ -30,19 +24,16 @@
 # Containers, streams, and codecs
 # -------------------------------
 #
-# A file like `capture.mp4` is a *container*: it holds one or more
-# *streams* (usually video and/or audio).  This demo writes one video
-# stream.
+# A file like `capture.mp4` is a *container*: it holds one or more *streams* (usually video and/or audio).  This demo
+# writes one video stream.
 #
-# The container interleaves ("muxes") stream data so players can read
-# everything in timestamp order. libav calls those pieces "packets".
-# (In MP4 they're not literally network-style packets; the term is a
-# longstanding libav abstraction.)
+# The container interleaves ("muxes") stream data so players can read everything in timestamp order. libav calls those
+# pieces "packets".  (In MP4 they're not literally network-style packets; the term is a longstanding libav
+# abstraction.)
 #
-# A *codec* is the algorithm that compresses/decompresses a stream.
-# For MP4 video, common codecs include H.264 and H.265.  This demo
-# defaults to H.264 via `libx264`, because it's widely supported.  You
-# can switch to hardware encoders (e.g. `h264_nvenc`) if available.
+# A *codec* is the algorithm that compresses/decompresses a stream.  For MP4 video, common codecs include H.264 and
+# H.265.  This demo defaults to H.264 via `libx264`, because it's widely supported.  You can switch to hardware
+# encoders (e.g. `h264_nvenc`) if available.
 #
 # Frames and frame reordering (I/P/B)
 # ----------------------------------
@@ -52,31 +43,25 @@
 # - P-frames: changes from previous frames.
 # - B-frames: changes predicted using both past *and future* frames.
 #
-# B-frames are why "the order frames are encoded/decoded" can differ
-# from "the order frames are shown".  That leads directly to
-# timestamps.
+# B-frames are why "the order frames are encoded/decoded" can differ from "the order frames are shown".  That leads
+# directly to timestamps.
 #
 # Timestamps (PTS/DTS)
 # --------------------
 #
-# Every frame has a *presentation timestamp* (PTS): when the viewer
-# should see it.
+# Every frame has a *presentation timestamp* (PTS): when the viewer should see it.
 #
-# Encoders may output packets in a different order due to B-frames.
-# Those packets also have a *decode timestamp* (DTS): when the decoder
-# must decode them so the PTS schedule can be met.
+# Encoders may output packets in a different order due to B-frames.  Those packets also have a *decode timestamp*
+# (DTS): when the decoder must decode them so the PTS schedule can be met.
 #
-# In this demo we set PTS on `VideoFrame`s and let libav/PyAV
-# propagate timestamps into the encoded packets.
+# In this demo we set PTS on `VideoFrame`s and let libav/PyAV propagate timestamps into the encoded packets.
 #
 # Time base
 # ---------
 #
-# Timestamps are integers, and their unit is a fraction of a second
-# called the *time base*.  For example, with a time base of 1/90000, a
-# timestamp of 90000 means "1 second".  PyAV will convert between time
-# bases when needed, but you must set them consistently where you
-# generate timestamps.
+# Timestamps are integers, and their unit is a fraction of a second called the *time base*.  For example, with a time
+# base of 1/90000, a timestamp of 90000 means "1 second".  PyAV will convert between time bases when needed, but you
+# must set them consistently where you generate timestamps.
 #
 # See <https://pyav.basswood-io.com/docs/stable/api/time.html>
 #
@@ -85,43 +70,32 @@
 # Constant Frame Rate (CFR) and Variable Frame Rate (VFR)
 # -------------------------------------------------------
 #
-# Many video files run at a fixed frame rate, like 30 fps.  Each frame
-# is shown at 1/30 sec intervals.  This is called *constant frame
-# rate*, or *CFR*, and that's what we do in the simple version of this
-# demo.
+# Many video files run at a fixed frame rate, like 30 fps.  Each frame is shown at 1/30 sec intervals.  This is called
+# *constant frame rate*, or *CFR*, and that's what we do in the simple version of this demo.
 #
-# One problem with this is that, if the encoder can't keep up, the
-# video will appear sped-up when played back.  The comments at the
-# beginning of the simple version of this demo go into more detail
-# about that problem.
+# One problem with this is that, if the encoder can't keep up, the video will appear sped-up when played back.  The
+# comments at the beginning of the simple version of this demo go into more detail about that problem.
 #
-# In this advanced version, we use *variable frame rate*, or *VFR*.
-# That's because we can't be sure that the encoder will be able to
-# work fast enough: we haven't tuned its settings for your screen
-# resolution and hardware.  While the encoder might be fast enough, it
-# might only be able to operate at 18 fps, or even less.
+# In this advanced version, we use *variable frame rate*, or *VFR*.  That's because we can't be sure that the encoder
+# will be able to work fast enough: we haven't tuned its settings for your screen resolution and hardware.  While the
+# encoder might be fast enough, it might only be able to operate at 18 fps, or even less.
 #
-# Instead, we mark each frame with the correct time that it should be
-# shown.  Even if the encoder is falling behind, its frames are still
-# marked with the right times, so the player will just keep the
-# previous frame on the screen a little longer.
+# Instead, we mark each frame with the correct time that it should be shown.  Even if the encoder is falling behind,
+# its frames are still marked with the right times, so the player will just keep the previous frame on the screen a
+# little longer.
 #
-# Some video editing software historically has had problems with VFR
-# video.  It's much better now than it was a few years ago, but if you
-# plan to edit the video, you may need to convert it to CFR.  There
-# are many resources online about how to do that.
+# Some video editing software historically has had problems with VFR video.  It's much better now than it was a few
+# years ago, but if you plan to edit the video, you may need to convert it to CFR.  There are many resources online
+# about how to do that.
 #
 # Performance (why multiple threads?)
 # ----------------------------------
 #
-# Capturing frames, converting them to `VideoFrame`s, encoding, and
-# muxing are separate stages.  This demo pipelines those stages across
-# threads so that (for example) encoding can run while the next screen
-# grab is happening.  The comments at the top of common/pipeline.py
-# describe pipelining in detail.
+# Capturing frames, converting them to `VideoFrame`s, encoding, and muxing are separate stages.  This demo pipelines
+# those stages across threads so that (for example) encoding can run while the next screen grab is happening.  The
+# comments at the top of common/pipeline.py describe pipelining in detail.
 #
-# The slowest stage typically limits overall FPS.  Usually, that's the
-# encoder.
+# The slowest stage typically limits overall FPS.  Usually, that's the encoder.
 #
 # On an idle system (rough guide; will vary widely):
 # - libx264, 1920x1080: ~80 fps
@@ -150,42 +124,33 @@
 from common.pipeline import Mailbox, PipelineStage
 
 
-# These are the options you'd give to ffmpeg that it sends to the
-# video codec.  The options you can use here can be listed with
-# `ffmpeg -help encoder=libx264`, or whatever encoder you're using for
-# this demo's `--codec` flag.  The options for each encoder are described
-# in more detail in `man ffmpeg-codecs`.
+# These are the options you'd give to ffmpeg that it sends to the video codec.  The options you can use here can be
+# listed with `ffmpeg -help encoder=libx264`, or whatever encoder you're using for this demo's `--codec` flag.  The
+# options for each encoder are described in more detail in `man ffmpeg-codecs`.
 CODEC_OPTIONS = {
-    # The "high" profile means that the encoder can use some H.264
-    # features that are widely supported, but not mandatory.  If
-    # you're using a codec other than H.264, you'll need to comment
-    # out this line: the relevant features are already part of the
-    # main profile in later codecs like H.265, VP8, VP9, and AV1.
+    # The "high" profile means that the encoder can use some H.264 features that are widely supported, but not
+    # mandatory.  If you're using a codec other than H.264, you'll need to comment out this line: the relevant
+    # features are already part of the main profile in later codecs like H.265, VP8, VP9, and AV1.
     "profile": "high",
-    # The "medium" preset is as good of a preset as any for a demo
-    # like this.  Different codecs have different presets; the
-    # h264_nvenc actually prefers "p4", but accepts "medium" as a
-    # similar preset.  You might prefer "fast" if you're not getting
-    # enough FPS.
+    # The "medium" preset is as good of a preset as any for a demo like this.  Different codecs have different
+    # presets; the h264_nvenc actually prefers "p4", but accepts "medium" as a similar preset.  You might prefer
+    # "fast" if you're not getting enough FPS.
     "preset": "medium",
-    # 6 Mbit/sec is vaguely the ballpark for a good-quality video at
-    # 1080p and 30 fps, but there's a lot of variation.  We're just
-    # giving the target bitrate: the second-to-second bitrate will
-    # vary a lot, and slowly approach this bitrate.  If you're trying
-    # this on a nearly-still screen, though, then the actual bitrate
-    # will be much lower, since there's not much motion to encode!
+    # 6 Mbit/sec is vaguely the ballpark for a good-quality video at 1080p and 30 fps, but there's a lot of variation.
+    # We're just giving the target bitrate: the second-to-second bitrate will vary a lot, and slowly approach this
+    # bitrate.  If you're trying this on a nearly-still screen, though, then the actual bitrate will be much lower,
+    # since there's not much motion to encode!
     "b": "6M",
-    # Let the encoder hold some frames for analysis, and flush them
-    # later.  This especially helps with the hardware-accelerated
-    # codecs.
+    # Let the encoder hold some frames for analysis, and flush them later.  This especially helps with the
+    # hardware-accelerated codecs.
     "rc-lookahead": "40",
 }
 
 
 TIME_BASE = Fraction(1, 90000)
 
-# Currently, MSS doesn't give us information about the display's
-# colorspace.  See where this is used below for more information.
+# Currently, MSS doesn't give us information about the display's colorspace.  See where this is used below for more
+# information.
 DISPLAY_IS_SRGB = False
 
 LOGGER = logging.getLogger("video-capture")
@@ -197,22 +162,17 @@ def video_capture(
     monitor: mss.models.Monitor,
     shutdown_requested: Event,
 ) -> Generator[tuple[mss.screenshot.ScreenShot, float], None, None]:
-    # Keep track of the time when we want to get the next frame.  We
-    # limit the frame time this way instead of sleeping 1/fps sec each
-    # frame, since we want to also account for the time taken to get
-    # the screenshot and other overhead.
+    # Keep track of the time when we want to get the next frame.  We limit the frame time this way instead of sleeping
+    # 1/fps sec each frame, since we want to also account for the time taken to get the screenshot and other overhead.
     #
-    # Repeatedly adding small floating-point numbers to a total does
-    # cause some numeric inaccuracies, but it's small enough for our
-    # purposes.  The program would have to run for three months to
-    # accumulate one millisecond of inaccuracy.
+    # Repeatedly adding small floating-point numbers to a total does cause some numeric inaccuracies, but it's small
+    # enough for our purposes.  The program would have to run for three months to accumulate one millisecond of
+    # inaccuracy.
     next_frame_at = time.monotonic()
 
-    # Keep running this loop until the main thread says we should
-    # stop.
+    # Keep running this loop until the main thread says we should stop.
     while not shutdown_requested.is_set():
-        # Wait until we're ready.  This should, ideally, happen every
-        # 1/fps second.
+        # Wait until we're ready.  This should, ideally, happen every 1/fps second.
         while (now := time.monotonic()) < next_frame_at:
             time.sleep(next_frame_at - now)
 
@@ -220,23 +180,16 @@ def video_capture(
         screenshot = sct.grab(monitor)
         yield screenshot, now
 
-        # We try to keep the capture rate at the desired fps on
-        # average.  If we can't quite keep up for a moment (such as if
-        # the computer is a little overloaded), then we'll accumulate
-        # a bit of "timing debt" in next_frame_at: it'll be a little
-        # sooner than now + one frame.  We'll hopefully be able to
-        # catch up soon.
+        # We try to keep the capture rate at the desired fps on average.  If we can't quite keep up for a moment (such
+        # as if the computer is a little overloaded), then we'll accumulate a bit of "timing debt" in next_frame_at:
+        # it'll be a little sooner than now + one frame.  We'll hopefully be able to catch up soon.
         next_frame_at = next_frame_at + (1 / fps)
 
-        # If we've accumulated over one frame's worth of timing debt,
-        # then that will say that next_frame_at is sooner than now.
-        # If we're accumulating too much debt, we want to wipe it out,
-        # rather than having a huge burst of closely-spaced captures
-        # as soon as we can get back to our desired capture rate.
-        # When we wipe that out, we still try to preserve the timing
-        # cycle's phase to keep the capture cadence smooth, rather
-        # than having a jittery burst of closely-spaced captures.  In
-        # other words, we increment next_frame_at by a multiple of the
+        # If we've accumulated over one frame's worth of timing debt, then that will say that next_frame_at is sooner
+        # than now.  If we're accumulating too much debt, we want to wipe it out, rather than having a huge burst of
+        # closely-spaced captures as soon as we can get back to our desired capture rate.  When we wipe that out, we
+        # still try to preserve the timing cycle's phase to keep the capture cadence smooth, rather than having a
+        # jittery burst of closely-spaced captures.  In other words, we increment next_frame_at by a multiple of the
         # desired capture period.
         if next_frame_at < now:
             missed_frames = floor((now - next_frame_at) * fps)
@@ -244,51 +197,42 @@ def video_capture(
 
 
 def video_process(
-    screenshot_and_timestamp: Iterable[
-        tuple[mss.screenshot.ScreenShot, float]
-    ],
+    screenshot_and_timestamp: Iterable[tuple[mss.screenshot.ScreenShot, float]],
 ) -> Generator[av.VideoFrame, None, None]:
-    # We track when the first frame happened so we can make PTS start
-    # at 0.  Many video players and other tools expect that.
+    # We track when the first frame happened so we can make PTS start at 0.  Many video players and other tools expect
+    # that.
     first_frame_at: float | None = None
 
     for screenshot, timestamp in screenshot_and_timestamp:
         # Avoiding extra pixel copies
         # ---------------------------
         #
-        # Copying a full frame of pixels is expensive.  On typical
-        # hardware, a plain CPU memcpy of a 4K BGRA image can cost on
-        # the order of ~3ms by itself, which is a big chunk of a 30fps
-        # budget (33ms) and an even bigger chunk of a 60fps budget
-        # (16.7ms).
+        # Copying a full frame of pixels is expensive.  On typical hardware, a plain CPU memcpy of a 4K BGRA image can
+        # cost on the order of ~3ms by itself, which is a big chunk of a 30fps budget (33ms) and an even bigger chunk
+        # of a 60fps budget (16.7ms).
         #
-        # So we want to be careful about the *conversion* step from an
-        # MSS `ScreenShot` to a PyAV `VideoFrame`.  Ideally, that step
-        # should reuse the same underlying bytes rather than creating
-        # additional intermediate copies.
+        # So we want to be careful about the *conversion* step from an MSS `ScreenShot` to a PyAV `VideoFrame`.
+        # Ideally, that step should reuse the same underlying bytes rather than creating additional intermediate
+        # copies.
         #
         # Buffers in Python
         # -----------------
         #
-        # Many Python objects expose their underlying memory via the
-        # "buffer protocol".  A buffer is just a view of raw bytes
-        # that other libraries can interpret without copying.
+        # Many Python objects expose their underlying memory via the "buffer protocol".  A buffer is just a view of
+        # raw bytes that other libraries can interpret without copying.
         #
-        # Common buffer objects include: `bytes`, `bytearray`,
-        # `memoryview`, and `array.array`.  `screenshot.bgra` is also
-        # a buffer (currently it is a `bytes` object, though that
-        # detail may change in the future).
+        # Common buffer objects include: `bytes`, `bytearray`, `memoryview`, and `array.array`.  `screenshot.bgra` is
+        # also a buffer (currently it is a `bytes` object, though that detail may change in the future).
         #
         # Minimum-copy path: ScreenShot -> NumPy -> VideoFrame
         # ----------------------------------------------------
         #
-        # `np.frombuffer()` creates an ndarray *view* of an existing
-        # buffer (no copy).  Reshaping also stays as a view.
+        # `np.frombuffer()` creates an ndarray *view* of an existing buffer (no copy).  Reshaping also stays as a
+        # view.
         #
-        # PyAV's `VideoFrame.from_ndarray()` always copies the data
-        # into a new frame-owned buffer.  For this demo we use the
-        # undocumented `VideoFrame.from_numpy_buffer()`, which creates
-        # a `VideoFrame` that shares memory with the ndarray.
+        # PyAV's `VideoFrame.from_ndarray()` always copies the data into a new frame-owned buffer.  For this demo we
+        # use the undocumented `VideoFrame.from_numpy_buffer()`, which creates a `VideoFrame` that shares memory with
+        # the ndarray.
         ndarray = np.frombuffer(screenshot.bgra, dtype=np.uint8)
         ndarray = ndarray.reshape(screenshot.height, screenshot.width, 4)
         frame = av.VideoFrame.from_numpy_buffer(ndarray, format="bgra")
@@ -299,9 +243,8 @@ def video_process(
         frame.pts = int((timestamp - first_frame_at) / TIME_BASE)
         frame.time_base = TIME_BASE
 
-        # If we know the colorspace of our frames, mark them
-        # accordingly.  See the comment where we set these attributes
-        # on video_stream for details.
+        # If we know the colorspace of our frames, mark them accordingly.  See the comment where we set these
+        # attributes on video_stream for details.
         if DISPLAY_IS_SRGB:
             frame.colorspace = av.video.reformatter.Colorspace.ITU709
             frame.color_range = av.video.reformatter.ColorRange.JPEG
@@ -314,8 +257,8 @@ def video_encode(
 ) -> Generator[Sequence[av.Packet], None, None]:
     for frame in frames:
         yield video_stream.encode(frame)
-    # Our input has run out.  Flush the frames that the encoder still
-    # is holding internally (such as to compute B-frames).
+    # Our input has run out.  Flush the frames that the encoder still is holding internally (such as to compute
+    # B-frames).
     yield video_stream.encode(None)
 
 
@@ -329,9 +272,8 @@ def show_stats(
     FPS indicates how fast the entire pipeline can run as a whole, not
     any individual stage.
     """
-    # The start time is only used for showing the clock.  The actual
-    # timing stats use packet timestamps (ultimately derived from the
-    # frame PTS we compute during capture).
+    # The start time is only used for showing the clock.  The actual timing stats use packet timestamps (ultimately
+    # derived from the frame PTS we compute during capture).
     start_time = time.monotonic()
     time_deque: deque[int] = deque(maxlen=100)
     bit_count_deque: deque[int] = deque(maxlen=100)
@@ -339,26 +281,21 @@ def show_stats(
     last_status_len = 0
 
     for frame_count, packet_batch in enumerate(packet_batches):
-        # Yield the packet data immediately, so the mux gets it as
-        # soon as possible, while we update our stats.
+        # Yield the packet data immediately, so the mux gets it as soon as possible, while we update our stats.
         yield packet_batch
 
         for packet in packet_batch:
             # FPS from timestamps: why DTS, not PTS?
             #
-            # Intuitively, you'd expect to compute FPS from PTS (the
-            # time the viewer should *see* each frame).  But encoders
-            # can reorder frames internally (especially with
-            # B-frames), so packets may come out in a different order
-            # than PTS.
+            # Intuitively, you'd expect to compute FPS from PTS (the time the viewer should *see* each frame).  But
+            # encoders can reorder frames internally (especially with B-frames), so packets may come out in a
+            # different order than PTS.
             #
-            # If we update a sliding window with out-of-order PTS
-            # values, the window start/end can "wiggle" even when the
-            # pipeline is steady, which makes the displayed FPS noisy.
+            # If we update a sliding window with out-of-order PTS values, the window start/end can "wiggle" even when
+            # the pipeline is steady, which makes the displayed FPS noisy.
             #
-            # DTS is the time order the decoder must process packets.
-            # Packets are emitted in DTS order, so using DTS gives a
-            # stable, monotonic timeline for the sliding window.
+            # DTS is the time order the decoder must process packets.  Packets are emitted in DTS order, so using DTS
+            # gives a stable, monotonic timeline for the sliding window.
             time_deque.append(packet.dts)
             bit_count = packet.size * 8
             bit_count_deque.append(bit_count)
@@ -370,8 +307,8 @@ def show_stats(
             running_minutes = int(running_time / 60)
             running_seconds = int(running_time % 60)
             window_secs = (time_deque[-1] - time_deque[0]) * TIME_BASE
-            # We can't use the last frame in the window when we divide
-            # by window_secs; that would be a fencepost error.
+            # We can't use the last frame in the window when we divide by window_secs; that would be a fencepost
+            # error.
             window_frames = len(time_deque) - 1
             window_bits = sum(bit_count_deque) - bit_count_deque[-1]
             fps = window_frames / window_secs
@@ -385,12 +322,10 @@ def show_stats(
             full_line = f"\r{line}{' ' * (last_status_len - this_status_len)}"
             print(full_line, end="")
             last_status_len = this_status_len
-    # Near shutdown the encoder flush can emit packets in large
-    # bursts, and we also throttle status updates (to avoid spamming
-    # the terminal).  That combination means the last displayed line
-    # may be stale or not representative of the final frames.  Rather
-    # than leaving potentially misleading numbers on screen, erase the
-    # status display.
+
+    # At shutdown, the encoder flush can emit packets in large bursts, and we also throttle status updates (to avoid
+    # spamming the terminal).  That combination means the last displayed line may be stale or not representative of
+    # the final frames.  Rather than leaving potentially misleading numbers on screen, erase the status display.
     print(f"\r{' ' * last_status_len}\r", end="")
 
 
@@ -417,13 +352,10 @@ def parse_region(s: str) -> tuple[int, int, int, int]:
 
 def main() -> None:
     logging.basicConfig(level=logging.DEBUG)
-    # If we don't enable PyAV's own logging, a lot of important error
-    # messages from libav won't be shown.
+    # If we don't enable PyAV's own logging, a lot of important error messages from libav won't be shown.
     av.logging.set_level(av.logging.VERBOSE)
 
-    parser = argparse.ArgumentParser(
-        description="Capture screen video to MP4 file"
-    )
+    parser = argparse.ArgumentParser(description="Capture screen video to MP4 file")
     parser.add_argument(
         "-f",
         "--fps",
@@ -488,103 +420,71 @@ def main() -> None:
         else:
             monitor = sct.monitors[args.monitor]
 
-        # We don't pass the container format to av.open here, so it
-        # will choose it based on the extension: .mp4, .mkv, etc.
+        # We don't pass the container format to av.open here, so it will choose it based on the extension: .mp4, .mkv,
+        # etc.
         with av.open(filename, "w") as avmux:
-            # We could initialize video_stream in video_encode, but
-            # doing it here means that we can open it before starting
-            # the capture thread, which avoids a warmup frame (one
-            # that takes longer to encode because the encoder is just
-            # starting).
+            # We could initialize video_stream in video_encode, but doing it here means that we can open it before
+            # starting the capture thread, which avoids a warmup frame (one that takes longer to encode because the
+            # encoder is just starting).
             #
-            # The rate= parameter here is just the nominal frame rate:
-            # some tools (like file browsers) might display this as
-            # the frame rate.  But we actually control timing via the
-            # pts and time_base values on the frames themselves.
-            video_stream = avmux.add_stream(
-                codec, rate=fps, options=CODEC_OPTIONS
-            )
-
-            # Ideally, we would set attributes such as colorspace,
-            # color_range, color_primaries, and color_trc here to
-            # describe the colorspace accurately.  Otherwise, the
-            # player has to guess whether this was recorded on an sRGB
-            # Windows machine, a Display P3 Mac, or if it's using
-            # linear RGB.  Currently, MSS doesn't give us colorspace
-            # information (DISPLAY_IS_SRGB is always False in this
-            # demo), so we don't try to specify a particular
-            # colorspace.  However, if your application knows the
-            # colorspace you're recording from, then you can set those
-            # attributes on the stream and the frames accordingly.
+            # The rate= parameter here is just the nominal frame rate: some tools (like file browsers) might display
+            # this as the frame rate.  But we actually control timing via the pts and time_base values on the frames
+            # themselves.
+            video_stream = avmux.add_stream(codec, rate=fps, options=CODEC_OPTIONS)
+
+            # Ideally, we would set attributes such as colorspace, color_range, color_primaries, and color_trc here to
+            # describe the colorspace accurately.  Otherwise, the player has to guess whether this was recorded on an
+            # sRGB Windows machine, a Display P3 Mac, or if it's using linear RGB.  Currently, MSS doesn't give us
+            # colorspace information (DISPLAY_IS_SRGB is always False in this demo), so we don't try to specify a
+            # particular colorspace.  However, if your application knows the colorspace you're recording from, then
+            # you can set those attributes on the stream and the frames accordingly.
             #
-            # These properties on the stream (actually, they're
-            # attached to its CodecContext) are used to tell the
-            # stream and container how to label the video stream's
-            # colorspace.  There are similar attributes on the frame
-            # itself; those are used to identify its colorspace, so
-            # the codec can do the correct RGB to YUV conversion.
+            # These properties on the stream (actually, they're attached to its CodecContext) are used to tell the
+            # stream and container how to label the video stream's colorspace.  There are similar attributes on the
+            # frame itself; those are used to identify its colorspace, so the codec can do the correct RGB to YUV
+            # conversion.
             if DISPLAY_IS_SRGB:
-                # color_primaries=1 is libavutil's AVCOL_PRI_BT709;
-                # PyAV doesn't define named constants for color
+                # color_primaries=1 is libavutil's AVCOL_PRI_BT709; PyAV doesn't define named constants for color
                 # primaries.
                 video_stream.color_primaries = 1
-                # What PyAV refers to as ITU709 is more commonly known
-                # as BT.709.
-                video_stream.colorspace = (
-                    av.video.reformatter.Colorspace.ITU709
-                )
-                # The "JPEG" color range is saying that we're using a
-                # color range like a computer, not like broadcast TV.
+                # What PyAV refers to as ITU709 is more commonly known as BT.709.
+                video_stream.colorspace = av.video.reformatter.Colorspace.ITU709
+                # The "JPEG" color range is saying that we're using a color range like a computer, not like broadcast
+                # TV.
                 video_stream.color_range = av.video.reformatter.ColorRange.JPEG
-                # PyAV doesn't define named constants for TRCs, so we
-                # pass it a numeric value.  Technically, sRGB's
-                # transformation characteristic is
-                # AVCOL_TRC_IEC61966_2_1 (13).  It's nearly the same
-                # as BT.709's TRC, so some video encoders will tag it
-                # as AVCOL_TRC_BT709 (1) instead.
+                # PyAV doesn't define named constants for TRCs, so we pass it a numeric value.  Technically, sRGB's
+                # transformation characteristic is AVCOL_TRC_IEC61966_2_1 (13).  It's nearly the same as BT.709's TRC,
+                # so some video encoders will tag it as AVCOL_TRC_BT709 (1) instead.
                 video_stream.color_trc = 13
 
             video_stream.width = monitor["width"]
             video_stream.height = monitor["height"]
-            # There are multiple time bases in play (stream, codec
-            # context, per-frame).  Depending on the container and
-            # codec, some of these might be ignored or overridden.  We
-            # set the desired time base consistently everywhere, so
-            # that the saved timestamps are correct regardless of what
-            # format we're saving to.
+            # There are multiple time bases in play (stream, codec context, per-frame).  Depending on the container
+            # and codec, some of these might be ignored or overridden.  We set the desired time base consistently
+            # everywhere, so that the saved timestamps are correct regardless of what format we're saving to.
             video_stream.time_base = TIME_BASE
             video_stream.codec_context.time_base = TIME_BASE
-            # `pix_fmt` here describes the pixel format we will *feed*
-            # into the encoder (not necessarily what the encoder will
-            # store in the bitstream).  H.264 encoders ultimately
-            # convert to a YUV format internally.
+            # `pix_fmt` here describes the pixel format we will *feed* into the encoder (not necessarily what the
+            # encoder will store in the bitstream).  H.264 encoders ultimately convert to a YUV format internally.
             #
-            # If the encoder accepts BGRA input (e.g., h264_nvenc), we
-            # can hand it MSS's BGRA frames directly and avoid an
-            # extra pre-conversion step on our side.
+            # If the encoder accepts BGRA input (e.g., h264_nvenc), we can hand it MSS's BGRA frames directly and
+            # avoid an extra pre-conversion step on our side.
             #
-            # If the encoder doesn't accept BGRA input (e.g.,
-            # libx264), PyAV will insert a conversion step
-            # automatically.  In that case, we let the codec choose
-            # the pix_fmt it's going to expect.
+            # If the encoder doesn't accept BGRA input (e.g., libx264), PyAV will insert a conversion step
+            # automatically.  In that case, we let the codec choose the pix_fmt it's going to expect.
             #
-            # Note: the alpha channel is ignored by H.264.  We may
-            # effectively be sending BGRx/BGR0.  But PyAV's VideoFrame
-            # only exposes "bgra" as the closest supported format.
+            # Note: the alpha channel is ignored by H.264.  We may effectively be sending BGRx/BGR0.  But PyAV's
+            # VideoFrame only exposes "bgra" as the closest supported format.
             if any(f.name == "bgra" for f in video_stream.codec.video_formats):
                 video_stream.pix_fmt = "bgra"
-            # We open (initialize) the codec explicitly here.  PyAV
-            # will automatically open it the first time we call
-            # video_stream.encode, but the time it takes to set the
-            # codec up means the first frame would be particularly
-            # slow.
+                # We open (initialize) the codec explicitly here.  PyAV will automatically open it the first time we
+                # call video_stream.encode, but the time it takes to set the codec up means the first frame would be
+                # particularly slow.
             video_stream.open()
 
             shutdown_requested = Event()
 
-            mailbox_screenshot: Mailbox[
-                tuple[mss.screenshot.ScreenShot, float]
-            ] = Mailbox()
+            mailbox_screenshot: Mailbox[tuple[mss.screenshot.ScreenShot, float]] = Mailbox()
             mailbox_frame: Mailbox[av.VideoFrame] = Mailbox()
             mailbox_packet_to_stats: Mailbox[Sequence[av.Packet]] = Mailbox()
             mailbox_packet_to_mux: Mailbox[Sequence[av.Packet]] = Mailbox()
@@ -639,15 +539,11 @@ def main() -> None:
             old_sigint_handler = signal.getsignal(signal.SIGINT)
 
             def sigint_handler(_signum: int, _frame: Any) -> None:
-                # Restore the default behavior, so if our shutdown
-                # doesn't work because of a bug in our code, the user
-                # can still press ^C again to terminate the program.
-                # (The default handler is also in
-                # signal.default_int_handler, but that's not
-                # documented.)
+                # Restore the default behavior, so if our shutdown doesn't work because of a bug in our code, the user
+                # can still press ^C again to terminate the program.  (The default handler is also in
+                # signal.default_int_handler, but that's not documented.)
                 signal.signal(signal.SIGINT, old_sigint_handler)
-                # The status line will typically be visible, so start
-                # a fresh line for this message.
+                # The status line will typically be visible, so start a fresh line for this message.
                 print("\nShutting down")
                 shutdown_requested.set()
 
@@ -657,10 +553,8 @@ def sigint_handler(_signum: int, _frame: Any) -> None:
 
             if duration_secs is not None:
                 stage_video_capture.join(timeout=duration_secs)
-                # Either the join timed out, or we processed a ^C and
-                # requested it exit.  Either way, it's safe to set the
-                # shutdown event again, and return to our normal
-                # processing loop.
+                # Either the join timed out, or we processed a ^C and requested it exit.  Either way, it's safe to set
+                # the shutdown event again, and return to our normal processing loop.
                 shutdown_requested.set()
 
             stage_video_capture.join()
@@ -669,34 +563,24 @@ def sigint_handler(_signum: int, _frame: Any) -> None:
             stage_show_stats.join()
             stage_mux.join()
 
-            # PyAV may insert an implicit conversion step between the
-            # frames we provide and what the encoder actually accepts
-            # (pixel format, colorspace, etc.).  When that happens,
-            # `video_stream.reformatter` gets set.
+            # PyAV may insert an implicit conversion step between the frames we provide and what the encoder actually
+            # accepts (pixel format, colorspace, etc.).  When that happens, `video_stream.reformatter` gets set.
             #
-            # This is useful to know for performance: those
-            # conversions are typically CPU-side work and can become a
-            # bottleneck.  Hardware-accelerated encoders, such as
-            # `h264_nvenc`, often accept BGRx, and can perform the
+            # This is useful to know for performance: those conversions are typically CPU-side work and can become a
+            # bottleneck.  Hardware-accelerated encoders, such as `h264_nvenc`, often accept BGRx, and can perform the
             # conversion using specialized hardware.
             #
-            # We already know that libx264 doesn't accept RGB input,
-            # so we don't warn about that.  (There is a libx264rgb,
-            # but that writes to a different H.264 format.)  We just
-            # want to warn about other codecs, since some of them
-            # might have ways to use BGRx input, and the programmer
-            # might want to investigate.
+            # We already know that libx264 doesn't accept RGB input, so we don't warn about that.  (There is a
+            # libx264rgb, but that writes to a different H.264 format.)  We just want to warn about other codecs,
+            # since some of them might have ways to use BGRx input, and the programmer might want to investigate.
             #
-            # Note: `reformatter` is created lazily, so it may only be
-            # set after frames have been sent through the encoder,
-            # which is why we check it at the end.
+            # Note: `reformatter` is created lazily, so it may only be set after frames have been sent through the
+            # encoder, which is why we check it at the end.
             if video_stream.reformatter is not None and codec != "libx264":
                 LOGGER.warning(
-                    "PyAV inserted a CPU-side pixel-format/colorspace "
-                    "conversion step; this can reduce FPS.  Check the "
-                    "acceptable pix_fmts for this codec, and see if one "
-                    "of them can accept some variation of BGRx input "
-                    "directly."
+                    "PyAV inserted a CPU-side pixel-format/colorspace conversion step; this can reduce FPS.  "
+                    "Check the acceptable pix_fmts for this codec, and see if one of them can accept some "
+                    "variation of BGRx input directly."
                 )
 
 

From ec9cd3742830825a4cf6eea4836be49230d1dcaf Mon Sep 17 00:00:00 2001
From: Joel Holveck <jholveck@nvidia.com>
Date: Tue, 20 Jan 2026 09:16:31 +0000
Subject: [PATCH 14/16] Comment improvements

---
 demos/video-capture.py | 53 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 43 insertions(+), 10 deletions(-)

diff --git a/demos/video-capture.py b/demos/video-capture.py
index 098d9cb..10543ec 100755
--- a/demos/video-capture.py
+++ b/demos/video-capture.py
@@ -124,9 +124,10 @@
 from common.pipeline import Mailbox, PipelineStage
 
 
-# These are the options you'd give to ffmpeg that it sends to the video codec.  The options you can use here can be
-# listed with `ffmpeg -help encoder=libx264`, or whatever encoder you're using for this demo's `--codec` flag.  The
-# options for each encoder are described in more detail in `man ffmpeg-codecs`.
+# These are the options you'd give to ffmpeg that it sends to the video codec.  Because ffmpeg and PyAV both use the
+# libav libraries, you can get the list of available flags with `ffmpeg -help encoder=libx264`, or whatever encoder
+# you're using for this demo's `--codec` flag.  The options for each encoder are described in more detail in `man
+# ffmpeg-codecs`.
 CODEC_OPTIONS = {
     # The "high" profile means that the encoder can use some H.264 features that are widely supported, but not
     # mandatory.  If you're using a codec other than H.264, you'll need to comment out this line: the relevant
@@ -238,6 +239,19 @@ def video_process(
         frame = av.VideoFrame.from_numpy_buffer(ndarray, format="bgra")
 
         # Set the PTS and time base for the frame.
+        #
+        # We compute PTS based on the actual time we captured the screenshot, relative to when we got the first
+        # frame.  This gives us variable frame rate (VFR) video that accurately reflects the times the frames were
+        # captured.
+        #
+        # However, if we were muxing in an audio stream as well, we'd want to use a common clock for both audio and
+        # video PTS, preferably based on the audio clock.  That's because audio glitches are more noticeable than
+        # video glitches, so audio timing should be prioritized.  In that case, the video PTS would be based on the
+        # audio clock, not the actual capture time.
+        #
+        # The easiest way to do that is to record the monotonic clock in both the video and audio capture stages
+        # (taking the audio latency into account), record the audio PTS based on how many audio samples have been
+        # captured, and then adjust the video PTS based on the skew between the audio and monotonic clocks.
         if first_frame_at is None:
             first_frame_at = timestamp
         frame.pts = int((timestamp - first_frame_at) / TIME_BASE)
@@ -465,16 +479,20 @@ def main() -> None:
             video_stream.time_base = TIME_BASE
             video_stream.codec_context.time_base = TIME_BASE
             # `pix_fmt` here describes the pixel format we will *feed* into the encoder (not necessarily what the
-            # encoder will store in the bitstream).  H.264 encoders ultimately convert to a YUV format internally.
+            # encoder will store in the bitstream).  H.264 encoders ultimately convert to a YUV 4:2:0 format
+            # internally.
             #
-            # If the encoder accepts BGRA input (e.g., h264_nvenc), we can hand it MSS's BGRA frames directly and
-            # avoid an extra pre-conversion step on our side.
+            # If the encoder accepts BGRx input (e.g., h264_nvenc), we can hand it MSS's BGRx frames directly and
+            # avoid an extra pre-conversion step on our side.  For a hardware encoder, that lets specialized hardware
+            # do the conversion to YUV efficiently.
             #
-            # If the encoder doesn't accept BGRA input (e.g., libx264), PyAV will insert a conversion step
-            # automatically.  In that case, we let the codec choose the pix_fmt it's going to expect.
+            # If the encoder doesn't accept BGRx input (e.g., libx264), PyAV will insert a conversion step
+            # automatically.  In that case, we let the codec choose the pix_fmt it wants.
             #
-            # Note: the alpha channel is ignored by H.264.  We may effectively be sending BGRx/BGR0.  But PyAV's
-            # VideoFrame only exposes "bgra" as the closest supported format.
+            # Note: the alpha channel is ignored by H.264.  We usually are sending sending BGRx/BGR0.  But PyAV's
+            # VideoFrame only exposes "bgra" as the closest supported format, so that's how we tag our frames, and
+            # what we tell the codec to expect, if possible.  You might need to change this for codecs like VP9 that
+            # can handle alpha channels.
             if any(f.name == "bgra" for f in video_stream.codec.video_formats):
                 video_stream.pix_fmt = "bgra"
                 # We open (initialize) the codec explicitly here.  PyAV will automatically open it the first time we
@@ -536,6 +554,19 @@ def main() -> None:
             LOGGER.debug("  Encode:     %s", stage_video_encode.native_id)
             LOGGER.debug("  Mux:        %s", stage_mux.native_id)
 
+            # Handle Ctrl-C gracefully by requesting shutdown.
+            #
+            # Python always routes signals to the main thread, so we don't have to worry about another thread getting
+            # a SIGINT (the Ctrl-C signal).  That's significant because if the video capture stage tried to set the
+            # shutdown_requested event (which requires the event lock) while it was already waiting for it (hence
+            # holding the lock), it could end up deadlocked.  The main thread doesn't ever acquire that lock.  As
+            # another point of safety, Python only will invoke our signal handler at a "safe" point, such as between
+            # bytecode instructions.
+
+            # We set old_sigint_handler twice: once here, and once when we change the handler.  The first time is
+            # just in case a signal arrives in the tiny window between when we set the new handler (by calling
+            # signal.signal), and when we assign it to old_sigint_handler (with "=").  Signal handling, like
+            # threading, is tricky to get right.
             old_sigint_handler = signal.getsignal(signal.SIGINT)
 
             def sigint_handler(_signum: int, _frame: Any) -> None:
@@ -552,6 +583,8 @@ def sigint_handler(_signum: int, _frame: Any) -> None:
             print("Starting video capture.  Press Ctrl-C to stop.")
 
             if duration_secs is not None:
+                # Wait for up to the specified duration.  If the pipeline shuts down for other reasons (such as an
+                # exception), then we'll recognize it sooner with this join.
                 stage_video_capture.join(timeout=duration_secs)
                 # Either the join timed out, or we processed a ^C and requested it exit.  Either way, it's safe to set
                 # the shutdown event again, and return to our normal processing loop.

From 6e126adf63086cfe83e694ec533a832439016499 Mon Sep 17 00:00:00 2001
From: Joel Holveck <jholveck@nvidia.com>
Date: Tue, 20 Jan 2026 09:26:21 +0000
Subject: [PATCH 15/16] Add docs references to demos

---
 CHANGELOG.md             |  1 +
 docs/source/examples.rst | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5087f45..6841012 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ See Git checking messages for full history.
 - Windows: improve error checking and messages for Win32 API calls (#448)
 - Mac: fix memory leak (#450, #453)
 - improve multithreading: allow multiple threads to use the same MSS object, allow multiple MSS objects to concurrently take screenshots, and document multithreading guarantees (#446, #452)
+- Add full demos for different ways to use MSS (#444, #456)
 - :heart: contributors: @jholveck
 
 ## 10.1.0 (2025-08-16)
diff --git a/docs/source/examples.rst b/docs/source/examples.rst
index 437e0f3..7b636bb 100644
--- a/docs/source/examples.rst
+++ b/docs/source/examples.rst
@@ -209,3 +209,19 @@ Different possibilities to convert raw BGRA values to RGB::
         ...
 
 .. versionadded:: 3.2.0
+
+
+Demos
+=====
+
+In addition to these simple examples, there are full demos of more complex use cases in the ``demos/`` directory of the
+source code.  The demos are not installed with the package, but you can run them directly from the source tree after
+cloning the repository.
+
+These are complete, working programs that use MSS for screen capture as a key part of their functionality.  They show
+not only how to invoke MSS, but also some of the techniques for using the captured frames efficiently, in real-world
+scenarios.
+
+These include:
+- MP4 video capture with encoding using PyAV (FFmpeg bindings)
+- Live streaming to a TinyTV as MJPEG

From 5c26cd158c295a18595f5a5da8abe65dbd6ffa53 Mon Sep 17 00:00:00 2001
From: Joel Ray Holveck <jholveck@nvidia.com>
Date: Tue, 27 Jan 2026 19:23:48 -0800
Subject: [PATCH 16/16] Clarify a few comments based on reviews

---
 demos/video-capture.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/demos/video-capture.py b/demos/video-capture.py
index 10543ec..18ff9f0 100755
--- a/demos/video-capture.py
+++ b/demos/video-capture.py
@@ -49,7 +49,8 @@
 # Timestamps (PTS/DTS)
 # --------------------
 #
-# Every frame has a *presentation timestamp* (PTS): when the viewer should see it.
+# Every frame has a *presentation timestamp* (PTS): when the viewer should see it.  (See the next section for how
+# these are represented.)
 #
 # Encoders may output packets in a different order due to B-frames.  Those packets also have a *decode timestamp*
 # (DTS): when the decoder must decode them so the PTS schedule can be met.
@@ -73,8 +74,12 @@
 # Many video files run at a fixed frame rate, like 30 fps.  Each frame is shown at 1/30 sec intervals.  This is called
 # *constant frame rate*, or *CFR*, and that's what we do in the simple version of this demo.
 #
-# One problem with this is that, if the encoder can't keep up, the video will appear sped-up when played back.  The
-# comments at the beginning of the simple version of this demo go into more detail about that problem.
+# Applications using CFR usually set the time base to the frame rate, such as 1/30 sec.  This lets them just use the
+# frame number for the PTS.
+#
+# One problem with real-time recording to CFR is that, if the encoder can't keep up, the video will appear sped-up
+# when played back.  The comments at the beginning of the simple version of this demo go into more detail about that
+# problem.
 #
 # In this advanced version, we use *variable frame rate*, or *VFR*.  That's because we can't be sure that the encoder
 # will be able to work fast enough: we haven't tuned its settings for your screen resolution and hardware.  While the