diff --git a/scenedetect.cfg b/scenedetect.cfg index fd6241cd..6ea4fb6f 100644 --- a/scenedetect.cfg +++ b/scenedetect.cfg @@ -227,9 +227,13 @@ # Compression amount for png images (0 to 9). Only affects size, not quality. #compression = 3 -# Number of frames to ignore around each scene cut when selecting frames. +# [DEPRECATED] Number of frames to ignore around each scene cut when selecting frames. +# TODO(v0.7): Remove this and add backwards compatibility helpers. #frame-margin = 1 +# Amount of time to ignore at the beginning/end of a shot when selecting frames. +#margin = 0.04s + # Resize by scale factor (0.5 = half, 1.0 = same, 2.0 = double). #scale = 1.0 diff --git a/scenedetect/_cli/__init__.py b/scenedetect/_cli/__init__.py index a0c639d2..010ac310 100644 --- a/scenedetect/_cli/__init__.py +++ b/scenedetect/_cli/__init__.py @@ -38,6 +38,7 @@ ) from scenedetect._cli.context import USER_CONFIG, CliContext, check_split_video_requirements from scenedetect.backends import AVAILABLE_BACKENDS +from scenedetect.common import FrameTimecode from scenedetect.detectors import ( AdaptiveDetector, ContentDetector, @@ -1400,8 +1401,17 @@ def split_video_command( metavar="N", default=None, type=click.INT, - help="Number of frames to ignore at beginning/end of scenes when saving images. Controls temporal padding on scene boundaries.%s" - % (USER_CONFIG.get_help_string("save-images", "num-images")), + help="[DEPRECATED] Use --margin instead. Number of frames to ignore at beginning/end of scenes when saving images.%s" + % (USER_CONFIG.get_help_string("save-images", "frame-margin")), +) +@click.option( + "-M", + "--margin", + metavar="TIME", + default=None, + type=click.STRING, + help="Amount of time to ignore at the beginning/end of each scene. Discards frame-margin if set. Can be specified as seconds (0.1), frames (3), or timecode (00:00:00.100).%s" + % (USER_CONFIG.get_help_string("save-images", "margin")), ) @click.option( "--scale", @@ -1442,6 +1452,7 @@ def save_images_command( png: bool = False, compression: ty.Optional[int] = None, frame_margin: ty.Optional[int] = None, + margin: ty.Optional[str] = None, scale: ty.Optional[float] = None, height: ty.Optional[int] = None, width: ty.Optional[int] = None, @@ -1487,6 +1498,12 @@ def save_images_command( raise click.BadParameter("\n".join(error_strs), param_hint="save-images") output = ctx.config.get_value("save-images", "output", output) + # Get margin value (from CLI arg or config), converting to FrameTimecode + margin_value = ctx.config.get_value("save-images", "margin", margin) + margin_tc = None + if margin_value is not None: + margin_tc = FrameTimecode(timecode=margin_value, fps=ctx.video_stream.frame_rate) + save_images_args = { "encoder_param": compression if png else quality, "frame_margin": ctx.config.get_value("save-images", "frame-margin", frame_margin), @@ -1498,6 +1515,7 @@ def save_images_command( "output": output, "scale": scale, "show_progress": not ctx.quiet_mode, + "margin": margin_tc, "threading": ctx.config.get_value("save-images", "threading"), "width": width, } diff --git a/scenedetect/_cli/commands.py b/scenedetect/_cli/commands.py index 6813caa8..cde14e7e 100644 --- a/scenedetect/_cli/commands.py +++ b/scenedetect/_cli/commands.py @@ -191,6 +191,7 @@ def save_images( width: int, interpolation: Interpolation, threading: bool, + margin: ty.Optional["FrameTimecode"] = None, ): """Handles the `save-images` command.""" del cuts # save-images only uses scenes. @@ -210,6 +211,7 @@ def save_images( width=width, interpolation=interpolation, threading=threading, + margin=margin, ) # Save the result for use by `save-html` if required. context.save_images_result = (images, output) diff --git a/scenedetect/_cli/config.py b/scenedetect/_cli/config.py index ee851da8..586317d5 100644 --- a/scenedetect/_cli/config.py +++ b/scenedetect/_cli/config.py @@ -419,6 +419,7 @@ class XmlFormat(Enum): "quality": RangeValue(_PLACEHOLDER, min_val=0, max_val=100), "scale": 1.0, "scale-method": Interpolation.LINEAR, + "margin": TimecodeValue("0.04s"), "threading": True, "width": 0, }, diff --git a/scenedetect/common.py b/scenedetect/common.py index e4ab7e48..481d38ec 100644 --- a/scenedetect/common.py +++ b/scenedetect/common.py @@ -656,6 +656,44 @@ def __sub__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> "FrameTi to_return -= other return to_return + def __mul__(self, factor: ty.Union[int, float]) -> "FrameTimecode": + """Multiply timecode by a scalar factor. Returns a new FrameTimecode.""" + if not isinstance(factor, (int, float)): + return NotImplemented + to_return = FrameTimecode(timecode=self) + if isinstance(to_return._time, Timecode): + to_return._time = Timecode( + pts=max(0, round(to_return._time.pts * factor)), + time_base=to_return._time.time_base, + ) + elif isinstance(to_return._time, _Seconds): + to_return._time = _Seconds(max(0.0, to_return._time.value * factor)) + else: + to_return._time = _FrameNumber(max(0, round(to_return._time.value * factor))) + return to_return + + def __rmul__(self, factor: ty.Union[int, float]) -> "FrameTimecode": + """Multiply timecode by a scalar factor (reversed). Returns a new FrameTimecode.""" + return self.__mul__(factor) + + def __truediv__(self, divisor: ty.Union[int, float]) -> "FrameTimecode": + """Divide timecode by a scalar divisor. Returns a new FrameTimecode.""" + if not isinstance(divisor, (int, float)): + return NotImplemented + if divisor == 0: + raise ZeroDivisionError("Cannot divide FrameTimecode by zero") + to_return = FrameTimecode(timecode=self) + if isinstance(to_return._time, Timecode): + to_return._time = Timecode( + pts=max(0, round(to_return._time.pts / divisor)), + time_base=to_return._time.time_base, + ) + elif isinstance(to_return._time, _Seconds): + to_return._time = _Seconds(max(0.0, to_return._time.value / divisor)) + else: + to_return._time = _FrameNumber(max(0, round(to_return._time.value / divisor))) + return to_return + # TODO(v1.0): __int__ and __float__ should be removed. Mark as deprecated, and indicate # need to use relevant property instead. diff --git a/scenedetect/output/image.py b/scenedetect/output/image.py index 3fa54b04..6c99ac59 100644 --- a/scenedetect/output/image.py +++ b/scenedetect/output/image.py @@ -77,6 +77,7 @@ def __init__( height: ty.Optional[int] = None, width: ty.Optional[int] = None, interpolation: Interpolation = Interpolation.CUBIC, + margin: ty.Optional[FrameTimecode] = None, ): """Multi-threaded implementation of save-images functionality. Uses background threads to handle image encoding and saving images to disk to improve parallelism. @@ -85,10 +86,10 @@ def __init__( Arguments: num_images: Number of images to generate for each scene. Minimum is 1. - frame_margin: Number of frames to pad each scene around the beginning + frame_margin: [DEPRECATED] Number of frames to pad each scene around the beginning and end (e.g. moves the first/last image into the scene by N frames). Can set to 0, but will result in some video files failing to extract - the very last frame. + the very last frame. Use `margin` instead. image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp'). encoder_param: Quality/compression efficiency, based on type of image: 'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp. @@ -109,9 +110,14 @@ def __init__( Specifying only width will rescale the image to that number of pixels wide while preserving the aspect ratio. interpolation: Type of interpolation to use when resizing images. + margin: Amount of time to ignore at the beginning/end of a scene when + selecting frames. Can be specified as frames (int), seconds (float), or timecode + string when creating the FrameTimecode. Uses presentation time (PTS) for selection. + When set, takes precedence over `frame_margin`. """ self._num_images = num_images self._frame_margin = frame_margin + self._margin = margin self._image_extension = image_extension self._image_name_template = image_name_template self._scale = scale @@ -290,48 +296,55 @@ def image_save_thread(self, save_queue: queue.Queue, progress_bar: tqdm): if progress_bar is not None: progress_bar.update(1) + def _generate_scene_timecodes( + self, start: FrameTimecode, end: FrameTimecode + ) -> ty.Iterable[FrameTimecode]: + """Generate timecodes for images to extract from a single scene. + + Uses margin to determine the effective time range, then distributes + images evenly across that range using time-based arithmetic. + """ + # Use margin if set, otherwise fall back to frame_margin converted to time + if self._margin is not None: + margin = self._margin + elif self._frame_margin > 0: + margin = FrameTimecode(self._frame_margin, fps=start.framerate) + else: + margin = FrameTimecode(0, fps=start.framerate) + + # Calculate effective time range with margin, clamped to scene bounds + first_time = min(start + margin, end) + last_time = max(end - margin, start) + + # Handle edge case where margins overlap (scene shorter than 2x margin) + if first_time > last_time: + # Use middle of scene for all images + middle = start + (end - start) / 2 + first_time = last_time = middle + + if self._num_images == 1: + # Single image: use middle of effective range + yield first_time + (last_time - first_time) / 2 + elif self._num_images == 2: + yield first_time + yield last_time + else: + # Multiple images: first at margin, last at margin, rest evenly distributed + duration = last_time - first_time + for i in range(self._num_images): + if i == 0: + yield first_time + elif i == self._num_images - 1: + yield last_time + else: + # Evenly distribute middle images + fraction = i / (self._num_images - 1) + yield first_time + duration * fraction + def generate_timecode_list(self, scene_list: SceneList) -> ty.List[ty.Iterable[FrameTimecode]]: """Generates a list of timecodes for each scene in `scene_list` based on the current config parameters.""" - # TODO(v0.7): This needs to be fixed as part of PTS overhaul. - framerate = scene_list[0][0].framerate - # TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly. - return [ - ( - FrameTimecode(int(f), fps=framerate) - for f in ( - # middle frames - a[len(a) // 2] - if (0 < j < self._num_images - 1) or self._num_images == 1 - # first frame - else min(a[0] + self._frame_margin, a[-1]) - if j == 0 - # last frame - else max(a[-1] - self._frame_margin, a[0]) - # for each evenly-split array of frames in the scene list - for j, a in enumerate(np.array_split(r, self._num_images)) - ) - ) - for r in ( - # pad ranges to number of images - r - if 1 + r[-1] - r[0] >= self._num_images - else list(r) + [r[-1]] * (self._num_images - len(r)) - # create range of frames in scene - for r in ( - range( - start.frame_num, - start.frame_num - + max( - 1, # guard against zero length scenes - end.frame_num - start.frame_num, - ), - ) - # for each scene in scene list - for start, end in scene_list - ) - ) - ] + return [self._generate_scene_timecodes(start, end) for start, end in scene_list] def resize_image( self, @@ -358,6 +371,7 @@ def save_images( width: ty.Optional[int] = None, interpolation: Interpolation = Interpolation.CUBIC, threading: bool = True, + margin: ty.Optional[FrameTimecode] = None, ) -> ty.Dict[int, ty.List[str]]: """Save a set number of images from each scene, given a list of scenes and the associated video/frame source. @@ -371,7 +385,7 @@ def save_images( frame_margin: Number of frames to pad each scene around the beginning and end (e.g. moves the first/last image into the scene by N frames). Can set to 0, but will result in some video files failing to extract - the very last frame. + the very last frame. Discarded if `margin` is set. image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp'). encoder_param: Quality/compression efficiency, based on type of image: 'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp. @@ -396,6 +410,9 @@ def save_images( while preserving the aspect ratio. interpolation: Type of interpolation to use when resizing images. threading: Offload image encoding and disk IO to background threads to improve performance. + margin: Amount of time to pad each scene around the beginning and end. Takes + precedence over `frame_margin` when set. Can be created from seconds (float), frames + (int), or timecode string. Returns: Dictionary of the format { scene_num : [image_paths] }, where scene_num is the @@ -432,6 +449,7 @@ def save_images( height, width, interpolation, + margin, ) return extractor.run(video, scene_list, output_dir, show_progress) @@ -451,45 +469,13 @@ def save_images( image_num_format = "%0" image_num_format += str(math.floor(math.log(num_images, 10)) + 2) + "d" - framerate = scene_list[0][0]._rate - - # TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly. - timecode_list = [ - [ - FrameTimecode(int(f), fps=framerate) - for f in ( - # middle frames - a[len(a) // 2] - if (0 < j < num_images - 1) or num_images == 1 - # first frame - else min(a[0] + frame_margin, a[-1]) - if j == 0 - # last frame - else max(a[-1] - frame_margin, a[0]) - # for each evenly-split array of frames in the scene list - for j, a in enumerate(np.array_split(r, num_images)) - ) - ] - for i, r in enumerate( - [ - # pad ranges to number of images - r if 1 + r[-1] - r[0] >= num_images else list(r) + [r[-1]] * (num_images - len(r)) - # create range of frames in scene - for r in ( - range( - start.frame_num, - start.frame_num - + max( - 1, # guard against zero length scenes - end.frame_num - start.frame_num, - ), - ) - # for each scene in scene list - for start, end in scene_list - ) - ] - ) - ] + # Use _ImageExtractor to generate timecodes (shares logic with threaded path) + extractor = _ImageExtractor( + num_images=num_images, + frame_margin=frame_margin, + margin=margin, + ) + timecode_list = [list(tc) for tc in extractor.generate_timecode_list(scene_list)] image_filenames = {i: [] for i in range(len(timecode_list))} aspect_ratio = video.aspect_ratio diff --git a/tests/test_output.py b/tests/test_output.py index bc1762e5..77e46ade 100644 --- a/tests/test_output.py +++ b/tests/test_output.py @@ -200,3 +200,196 @@ def test_deprecated_output_modules_emits_warning_on_import(): ) with pytest.warns(DeprecationWarning, match=VIDEO_SPLITTER_WARNING): from scenedetect.video_splitter import split_video_ffmpeg as _ + + +class TestImageExtractorMargin: + """Tests for _ImageExtractor margin functionality using PTS-based selection.""" + + def test_margin_uses_seconds_not_frames(self): + """Test that margin operates on presentation time, not frame count. + + With a 0.1s margin on a scene from 0s to 3s at 30fps: + - First image should be at ~0.1s (frame 3) + - Last image should be at ~2.9s (frame 87) + """ + from scenedetect.output.image import _ImageExtractor + + # 30 fps, 0.1s margin + extractor = _ImageExtractor(num_images=3, margin=FrameTimecode(timecode=0.1, fps=30.0)) + + # Scene from frame 0 to 90 (0s to 3s at 30fps) + scene_list = [ + (FrameTimecode(0, fps=30.0), FrameTimecode(90, fps=30.0)), + ] + timecode_list = extractor.generate_timecode_list(scene_list) + timecodes = list(timecode_list[0]) + + # First image: start.seconds + margin = 0 + 0.1 = 0.1s → frame 3 + assert timecodes[0].seconds == pytest.approx(0.1, abs=0.05) + # Middle image: should be around middle of scene + assert timecodes[1].seconds == pytest.approx(1.5, abs=0.1) + # Last image: end.seconds - margin = 3.0 - 0.1 = 2.9s → frame 87 + assert timecodes[2].seconds == pytest.approx(2.9, abs=0.05) + + def test_margin_different_framerates(self): + """Test margin works consistently across different framerates. + + The same margin (0.1s) should result in different frame offsets + but the same time offset regardless of framerate. + """ + from scenedetect.output.image import _ImageExtractor + + for fps in [24.0, 25.0, 30.0, 60.0]: + extractor = _ImageExtractor(num_images=3, margin=FrameTimecode(timecode=0.1, fps=fps)) + # 3 second scene + scene_list = [ + (FrameTimecode(0, fps=fps), FrameTimecode(int(3 * fps), fps=fps)), + ] + timecode_list = extractor.generate_timecode_list(scene_list) + timecodes = list(timecode_list[0]) + + # First and last images should be offset by ~0.1s regardless of fps + assert timecodes[0].seconds == pytest.approx(0.1, abs=0.05), f"Failed at {fps}fps" + assert timecodes[2].seconds == pytest.approx(2.9, abs=0.05), f"Failed at {fps}fps" + + def test_margin_clamped_to_scene_bounds(self): + """Test that margin is clamped when scene is shorter than 2x margin.""" + from scenedetect.output.image import _ImageExtractor + + # 0.5s margin on a 0.5s scene - should clamp to scene bounds + extractor = _ImageExtractor(num_images=3, margin=FrameTimecode(timecode=0.5, fps=30.0)) + + # Scene from frame 0 to 15 (0s to 0.5s at 30fps) + scene_list = [ + (FrameTimecode(0, fps=30.0), FrameTimecode(15, fps=30.0)), + ] + timecode_list = extractor.generate_timecode_list(scene_list) + timecodes = list(timecode_list[0]) + + # All frames should be within scene bounds + for tc in timecodes: + assert 0.0 <= tc.seconds <= 0.5 + + def test_margin_zero(self): + """Test that zero margin selects frames at scene boundaries.""" + from scenedetect.output.image import _ImageExtractor + + extractor = _ImageExtractor(num_images=3, margin=FrameTimecode(timecode=0.0, fps=30.0)) + + scene_list = [ + (FrameTimecode(30, fps=30.0), FrameTimecode(90, fps=30.0)), + ] + timecode_list = extractor.generate_timecode_list(scene_list) + timecodes = list(timecode_list[0]) + + # First image at scene start (1s) + assert timecodes[0].seconds == pytest.approx(1.0, abs=0.05) + # Last image near scene end (3s) + assert timecodes[2].seconds == pytest.approx(2.97, abs=0.1) + + def test_margin_with_pts_timecodes(self): + """Test margin works correctly with PTS-based FrameTimecodes. + + PTS (Presentation Time Stamp) based timecodes use a time_base rather than + a fixed framerate. This test verifies that margin calculations + work correctly when scenes are defined using PTS. + """ + from fractions import Fraction + + from scenedetect.common import Timecode + from scenedetect.output.image import _ImageExtractor + + # Use a time_base of 1/1000 (milliseconds) - common for many video formats + time_base = Fraction(1, 1000) + + # Create PTS-based FrameTimecodes for a 3 second scene (0ms to 3000ms) + start = FrameTimecode(timecode=Timecode(pts=0, time_base=time_base), fps=30.0) + end = FrameTimecode(timecode=Timecode(pts=3000, time_base=time_base), fps=30.0) + + # 100ms (0.1s) margin, also as PTS-based + margin = FrameTimecode(timecode=Timecode(pts=100, time_base=time_base), fps=30.0) + + extractor = _ImageExtractor(num_images=3, margin=margin) + + scene_list = [(start, end)] + timecode_list = extractor.generate_timecode_list(scene_list) + timecodes = list(timecode_list[0]) + + # First image: 0s + 0.1s margin = 0.1s + assert timecodes[0].seconds == pytest.approx(0.1, abs=0.05) + # Middle image: ~1.5s + assert timecodes[1].seconds == pytest.approx(1.5, abs=0.1) + # Last image: 3s - 0.1s margin = 2.9s + assert timecodes[2].seconds == pytest.approx(2.9, abs=0.05) + + def test_margin_pts_preserves_time_base(self): + """Test that output timecodes preserve the time_base from input PTS timecodes.""" + from fractions import Fraction + + from scenedetect.common import Timecode + from scenedetect.output.image import _ImageExtractor + + time_base = Fraction(1, 90000) # Common time_base for MPEG-TS + + # 2 second scene at pts 0 to 180000 (at 1/90000 time_base) + start = FrameTimecode(timecode=Timecode(pts=0, time_base=time_base), fps=30.0) + end = FrameTimecode(timecode=Timecode(pts=180000, time_base=time_base), fps=30.0) + + # 0.1s margin = 9000 pts at 1/90000 time_base + margin = FrameTimecode(timecode=Timecode(pts=9000, time_base=time_base), fps=30.0) + + extractor = _ImageExtractor(num_images=2, margin=margin) + + scene_list = [(start, end)] + timecode_list = extractor.generate_timecode_list(scene_list) + timecodes = list(timecode_list[0]) + + # Verify time values are correct + assert timecodes[0].seconds == pytest.approx(0.1, abs=0.01) + assert timecodes[1].seconds == pytest.approx(1.9, abs=0.01) + + def test_frame_margin_backwards_compatibility(self): + """Test that frame_margin still works when margin is not set. + + This ensures backwards compatibility with existing code using frame_margin. + """ + from scenedetect.output.image import _ImageExtractor + + # 3 frame margin at 30fps = 0.1s + extractor = _ImageExtractor(num_images=3, frame_margin=3) + + # Scene from frame 0 to 90 (0s to 3s at 30fps) + scene_list = [ + (FrameTimecode(0, fps=30.0), FrameTimecode(90, fps=30.0)), + ] + timecode_list = extractor.generate_timecode_list(scene_list) + timecodes = list(timecode_list[0]) + + # First image: 3 frames = 0.1s at 30fps + assert timecodes[0].seconds == pytest.approx(0.1, abs=0.05) + # Middle image: ~1.5s + assert timecodes[1].seconds == pytest.approx(1.5, abs=0.1) + # Last image: 3s - 3 frames = 2.9s + assert timecodes[2].seconds == pytest.approx(2.9, abs=0.05) + + def test_margin_overrides_frame_margin(self): + """Test that margin takes precedence over frame_margin when both are set.""" + from scenedetect.output.image import _ImageExtractor + + # Set frame_margin to 30 frames (1s at 30fps), but margin to 0.1s + # margin should win + extractor = _ImageExtractor( + num_images=3, + frame_margin=30, # Would be 1s at 30fps + margin=FrameTimecode(timecode=0.1, fps=30.0), # 0.1s + ) + + scene_list = [ + (FrameTimecode(0, fps=30.0), FrameTimecode(90, fps=30.0)), + ] + timecode_list = extractor.generate_timecode_list(scene_list) + timecodes = list(timecode_list[0]) + + # Should use margin (0.1s), not frame_margin (1s) + assert timecodes[0].seconds == pytest.approx(0.1, abs=0.05) + assert timecodes[2].seconds == pytest.approx(2.9, abs=0.05)