Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion scenedetect.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,13 @@
# Compression amount for png images (0 to 9). Only affects size, not quality.
#compression = 3

# Number of frames to ignore around each scene cut when selecting frames.
# [DEPRECATED] Number of frames to ignore around each scene cut when selecting frames.
# TODO(v0.7): Remove this and add backwards compatibility helpers.
#frame-margin = 1

# Amount of time to ignore at the beginning/end of a shot when selecting frames.
#margin = 0.04s

# Resize by scale factor (0.5 = half, 1.0 = same, 2.0 = double).
#scale = 1.0

Expand Down
22 changes: 20 additions & 2 deletions scenedetect/_cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
)
from scenedetect._cli.context import USER_CONFIG, CliContext, check_split_video_requirements
from scenedetect.backends import AVAILABLE_BACKENDS
from scenedetect.common import FrameTimecode
from scenedetect.detectors import (
AdaptiveDetector,
ContentDetector,
Expand Down Expand Up @@ -1400,8 +1401,17 @@ def split_video_command(
metavar="N",
default=None,
type=click.INT,
help="Number of frames to ignore at beginning/end of scenes when saving images. Controls temporal padding on scene boundaries.%s"
% (USER_CONFIG.get_help_string("save-images", "num-images")),
help="[DEPRECATED] Use --margin instead. Number of frames to ignore at beginning/end of scenes when saving images.%s"
% (USER_CONFIG.get_help_string("save-images", "frame-margin")),
)
@click.option(
"-M",
"--margin",
metavar="TIME",
default=None,
type=click.STRING,
help="Amount of time to ignore at the beginning/end of each scene. Discards frame-margin if set. Can be specified as seconds (0.1), frames (3), or timecode (00:00:00.100).%s"
% (USER_CONFIG.get_help_string("save-images", "margin")),
)
@click.option(
"--scale",
Expand Down Expand Up @@ -1442,6 +1452,7 @@ def save_images_command(
png: bool = False,
compression: ty.Optional[int] = None,
frame_margin: ty.Optional[int] = None,
margin: ty.Optional[str] = None,
scale: ty.Optional[float] = None,
height: ty.Optional[int] = None,
width: ty.Optional[int] = None,
Expand Down Expand Up @@ -1487,6 +1498,12 @@ def save_images_command(
raise click.BadParameter("\n".join(error_strs), param_hint="save-images")
output = ctx.config.get_value("save-images", "output", output)

# Get margin value (from CLI arg or config), converting to FrameTimecode
margin_value = ctx.config.get_value("save-images", "margin", margin)
margin_tc = None
if margin_value is not None:
margin_tc = FrameTimecode(timecode=margin_value, fps=ctx.video_stream.frame_rate)

save_images_args = {
"encoder_param": compression if png else quality,
"frame_margin": ctx.config.get_value("save-images", "frame-margin", frame_margin),
Expand All @@ -1498,6 +1515,7 @@ def save_images_command(
"output": output,
"scale": scale,
"show_progress": not ctx.quiet_mode,
"margin": margin_tc,
"threading": ctx.config.get_value("save-images", "threading"),
"width": width,
}
Expand Down
2 changes: 2 additions & 0 deletions scenedetect/_cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ def save_images(
width: int,
interpolation: Interpolation,
threading: bool,
margin: ty.Optional["FrameTimecode"] = None,
):
"""Handles the `save-images` command."""
del cuts # save-images only uses scenes.
Expand All @@ -210,6 +211,7 @@ def save_images(
width=width,
interpolation=interpolation,
threading=threading,
margin=margin,
)
# Save the result for use by `save-html` if required.
context.save_images_result = (images, output)
Expand Down
1 change: 1 addition & 0 deletions scenedetect/_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,7 @@ class XmlFormat(Enum):
"quality": RangeValue(_PLACEHOLDER, min_val=0, max_val=100),
"scale": 1.0,
"scale-method": Interpolation.LINEAR,
"margin": TimecodeValue("0.04s"),
"threading": True,
"width": 0,
},
Expand Down
38 changes: 38 additions & 0 deletions scenedetect/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,44 @@ def __sub__(self, other: ty.Union[int, float, str, "FrameTimecode"]) -> "FrameTi
to_return -= other
return to_return

def __mul__(self, factor: ty.Union[int, float]) -> "FrameTimecode":
"""Multiply timecode by a scalar factor. Returns a new FrameTimecode."""
if not isinstance(factor, (int, float)):
return NotImplemented
to_return = FrameTimecode(timecode=self)
if isinstance(to_return._time, Timecode):
to_return._time = Timecode(
pts=max(0, round(to_return._time.pts * factor)),
time_base=to_return._time.time_base,
)
elif isinstance(to_return._time, _Seconds):
to_return._time = _Seconds(max(0.0, to_return._time.value * factor))
else:
to_return._time = _FrameNumber(max(0, round(to_return._time.value * factor)))
return to_return

def __rmul__(self, factor: ty.Union[int, float]) -> "FrameTimecode":
"""Multiply timecode by a scalar factor (reversed). Returns a new FrameTimecode."""
return self.__mul__(factor)

def __truediv__(self, divisor: ty.Union[int, float]) -> "FrameTimecode":
"""Divide timecode by a scalar divisor. Returns a new FrameTimecode."""
if not isinstance(divisor, (int, float)):
return NotImplemented
if divisor == 0:
raise ZeroDivisionError("Cannot divide FrameTimecode by zero")
to_return = FrameTimecode(timecode=self)
if isinstance(to_return._time, Timecode):
to_return._time = Timecode(
pts=max(0, round(to_return._time.pts / divisor)),
time_base=to_return._time.time_base,
)
elif isinstance(to_return._time, _Seconds):
to_return._time = _Seconds(max(0.0, to_return._time.value / divisor))
else:
to_return._time = _FrameNumber(max(0, round(to_return._time.value / divisor)))
return to_return

# TODO(v1.0): __int__ and __float__ should be removed. Mark as deprecated, and indicate
# need to use relevant property instead.

Expand Down
148 changes: 67 additions & 81 deletions scenedetect/output/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def __init__(
height: ty.Optional[int] = None,
width: ty.Optional[int] = None,
interpolation: Interpolation = Interpolation.CUBIC,
margin: ty.Optional[FrameTimecode] = None,
):
"""Multi-threaded implementation of save-images functionality. Uses background threads to
handle image encoding and saving images to disk to improve parallelism.
Expand All @@ -85,10 +86,10 @@ def __init__(

Arguments:
num_images: Number of images to generate for each scene. Minimum is 1.
frame_margin: Number of frames to pad each scene around the beginning
frame_margin: [DEPRECATED] Number of frames to pad each scene around the beginning
and end (e.g. moves the first/last image into the scene by N frames).
Can set to 0, but will result in some video files failing to extract
the very last frame.
the very last frame. Use `margin` instead.
image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp').
encoder_param: Quality/compression efficiency, based on type of image:
'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp.
Expand All @@ -109,9 +110,14 @@ def __init__(
Specifying only width will rescale the image to that number of pixels wide
while preserving the aspect ratio.
interpolation: Type of interpolation to use when resizing images.
margin: Amount of time to ignore at the beginning/end of a scene when
selecting frames. Can be specified as frames (int), seconds (float), or timecode
string when creating the FrameTimecode. Uses presentation time (PTS) for selection.
When set, takes precedence over `frame_margin`.
"""
self._num_images = num_images
self._frame_margin = frame_margin
self._margin = margin
self._image_extension = image_extension
self._image_name_template = image_name_template
self._scale = scale
Expand Down Expand Up @@ -290,48 +296,55 @@ def image_save_thread(self, save_queue: queue.Queue, progress_bar: tqdm):
if progress_bar is not None:
progress_bar.update(1)

def _generate_scene_timecodes(
self, start: FrameTimecode, end: FrameTimecode
) -> ty.Iterable[FrameTimecode]:
"""Generate timecodes for images to extract from a single scene.

Uses margin to determine the effective time range, then distributes
images evenly across that range using time-based arithmetic.
"""
# Use margin if set, otherwise fall back to frame_margin converted to time
if self._margin is not None:
margin = self._margin
elif self._frame_margin > 0:
margin = FrameTimecode(self._frame_margin, fps=start.framerate)
else:
margin = FrameTimecode(0, fps=start.framerate)

# Calculate effective time range with margin, clamped to scene bounds
first_time = min(start + margin, end)
last_time = max(end - margin, start)

# Handle edge case where margins overlap (scene shorter than 2x margin)
if first_time > last_time:
# Use middle of scene for all images
middle = start + (end - start) / 2
first_time = last_time = middle

if self._num_images == 1:
# Single image: use middle of effective range
yield first_time + (last_time - first_time) / 2
elif self._num_images == 2:
yield first_time
yield last_time
else:
# Multiple images: first at margin, last at margin, rest evenly distributed
duration = last_time - first_time
for i in range(self._num_images):
if i == 0:
yield first_time
elif i == self._num_images - 1:
yield last_time
else:
# Evenly distribute middle images
fraction = i / (self._num_images - 1)
yield first_time + duration * fraction

def generate_timecode_list(self, scene_list: SceneList) -> ty.List[ty.Iterable[FrameTimecode]]:
"""Generates a list of timecodes for each scene in `scene_list` based on the current config
parameters."""
# TODO(v0.7): This needs to be fixed as part of PTS overhaul.
framerate = scene_list[0][0].framerate
# TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly.
return [
(
FrameTimecode(int(f), fps=framerate)
for f in (
# middle frames
a[len(a) // 2]
if (0 < j < self._num_images - 1) or self._num_images == 1
# first frame
else min(a[0] + self._frame_margin, a[-1])
if j == 0
# last frame
else max(a[-1] - self._frame_margin, a[0])
# for each evenly-split array of frames in the scene list
for j, a in enumerate(np.array_split(r, self._num_images))
)
)
for r in (
# pad ranges to number of images
r
if 1 + r[-1] - r[0] >= self._num_images
else list(r) + [r[-1]] * (self._num_images - len(r))
# create range of frames in scene
for r in (
range(
start.frame_num,
start.frame_num
+ max(
1, # guard against zero length scenes
end.frame_num - start.frame_num,
),
)
# for each scene in scene list
for start, end in scene_list
)
)
]
return [self._generate_scene_timecodes(start, end) for start, end in scene_list]

def resize_image(
self,
Expand All @@ -358,6 +371,7 @@ def save_images(
width: ty.Optional[int] = None,
interpolation: Interpolation = Interpolation.CUBIC,
threading: bool = True,
margin: ty.Optional[FrameTimecode] = None,
) -> ty.Dict[int, ty.List[str]]:
"""Save a set number of images from each scene, given a list of scenes
and the associated video/frame source.
Expand All @@ -371,7 +385,7 @@ def save_images(
frame_margin: Number of frames to pad each scene around the beginning
and end (e.g. moves the first/last image into the scene by N frames).
Can set to 0, but will result in some video files failing to extract
the very last frame.
the very last frame. Discarded if `margin` is set.
image_extension: Type of image to save (must be one of 'jpg', 'png', or 'webp').
encoder_param: Quality/compression efficiency, based on type of image:
'jpg' / 'webp': Quality 0-100, higher is better quality. 100 is lossless for webp.
Expand All @@ -396,6 +410,9 @@ def save_images(
while preserving the aspect ratio.
interpolation: Type of interpolation to use when resizing images.
threading: Offload image encoding and disk IO to background threads to improve performance.
margin: Amount of time to pad each scene around the beginning and end. Takes
precedence over `frame_margin` when set. Can be created from seconds (float), frames
(int), or timecode string.

Returns:
Dictionary of the format { scene_num : [image_paths] }, where scene_num is the
Expand Down Expand Up @@ -432,6 +449,7 @@ def save_images(
height,
width,
interpolation,
margin,
)
return extractor.run(video, scene_list, output_dir, show_progress)

Expand All @@ -451,45 +469,13 @@ def save_images(
image_num_format = "%0"
image_num_format += str(math.floor(math.log(num_images, 10)) + 2) + "d"

framerate = scene_list[0][0]._rate

# TODO(v1.0): Split up into multiple sub-expressions so auto-formatter works correctly.
timecode_list = [
[
FrameTimecode(int(f), fps=framerate)
for f in (
# middle frames
a[len(a) // 2]
if (0 < j < num_images - 1) or num_images == 1
# first frame
else min(a[0] + frame_margin, a[-1])
if j == 0
# last frame
else max(a[-1] - frame_margin, a[0])
# for each evenly-split array of frames in the scene list
for j, a in enumerate(np.array_split(r, num_images))
)
]
for i, r in enumerate(
[
# pad ranges to number of images
r if 1 + r[-1] - r[0] >= num_images else list(r) + [r[-1]] * (num_images - len(r))
# create range of frames in scene
for r in (
range(
start.frame_num,
start.frame_num
+ max(
1, # guard against zero length scenes
end.frame_num - start.frame_num,
),
)
# for each scene in scene list
for start, end in scene_list
)
]
)
]
# Use _ImageExtractor to generate timecodes (shares logic with threaded path)
extractor = _ImageExtractor(
num_images=num_images,
frame_margin=frame_margin,
margin=margin,
)
timecode_list = [list(tc) for tc in extractor.generate_timecode_list(scene_list)]

image_filenames = {i: [] for i in range(len(timecode_list))}
aspect_ratio = video.aspect_ratio
Expand Down
Loading
Loading