Skip to content

Commit eef0ade

Browse files
committed
[scene_manager] Add ability to crop input
1 parent 3fb8d8a commit eef0ade

File tree

9 files changed

+150
-23
lines changed

9 files changed

+150
-23
lines changed

docs/cli.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ Options
5656

5757
Path to config file. See :ref:`config file reference <scenedetect_cli-config_file>` for details.
5858

59+
.. option:: --crop X0 Y0 X1 Y1
60+
61+
Crop input video. Specified as two points representing top left and bottom right corner of crop region. 0 0 is top-left of the video frame. Bounds are inclusive (e.g. for a 100x100 video, the region covering the whole frame is 0 0 99 99).
62+
5963
.. option:: -s CSV, --stats CSV
6064

6165
Stats file (.csv) to write frame metrics. Existing files will be overwritten. Used for tuning detection parameters and data analysis.

scenedetect.cfg

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
# Video backend interface, must be one of: opencv, pyav.
3333
#backend = opencv
3434

35+
# Crop input video to area. Specified as two points in the form X0 Y0 X1 Y1 or
36+
# as (X0 Y0), (X1 Y1). Coordinate (0, 0) is the top-left corner.
37+
#crop = 100 100 200 250
38+
3539
# Downscale frame using a ratio of N. Set to 1 for no downscaling. If unset,
3640
# applied automatically based on input video resolution. Must be an integer value.
3741
#downscale = 1

scenedetect/_cli/__init__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,14 @@ def _print_command_help(ctx: click.Context, command: click.Command):
228228
help="Backend to use for video input. Backend options can be set using a config file (-c/--config). [available: %s]%s"
229229
% (", ".join(AVAILABLE_BACKENDS.keys()), USER_CONFIG.get_help_string("global", "backend")),
230230
)
231+
@click.option(
232+
"--crop",
233+
metavar="X0 Y0 X1 Y1",
234+
type=(int, int, int, int),
235+
default=None,
236+
help="Crop input video. Specified as two points representing top left and bottom right corner of crop region. 0 0 is top-left of the video frame. Bounds are inclusive (e.g. for a 100x100 video, the region covering the whole frame is 0 0 99 99).%s"
237+
% (USER_CONFIG.get_help_string("global", "crop", show_default=False)),
238+
)
231239
@click.option(
232240
"--downscale",
233241
"-d",
@@ -284,6 +292,7 @@ def scenedetect(
284292
drop_short_scenes: ty.Optional[bool],
285293
merge_last_scene: ty.Optional[bool],
286294
backend: ty.Optional[str],
295+
crop: ty.Optional[ty.Tuple[int, int, int, int]],
287296
downscale: ty.Optional[int],
288297
frame_skip: ty.Optional[int],
289298
verbosity: ty.Optional[str],
@@ -324,12 +333,13 @@ def scenedetect(
324333
output=output,
325334
framerate=framerate,
326335
stats_file=stats,
327-
downscale=downscale,
328336
frame_skip=frame_skip,
329337
min_scene_len=min_scene_len,
330338
drop_short_scenes=drop_short_scenes,
331339
merge_last_scene=merge_last_scene,
332340
backend=backend,
341+
crop=crop,
342+
downscale=downscale,
333343
quiet=quiet,
334344
logfile=logfile,
335345
config=config,

scenedetect/_cli/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,12 @@ def format(self, timecode: FrameTimecode) -> str:
313313
},
314314
"global": {
315315
"backend": "opencv",
316+
#
317+
#
318+
# FIXME: This should be a tuple of 4 valid ints similar to ScoreWeightsValue.
319+
#
320+
#
321+
"crop": None,
316322
"default-detector": "detect-adaptive",
317323
"downscale": 0,
318324
"downscale-method": Interpolation.LINEAR,

scenedetect/_cli/context.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,12 +157,13 @@ def handle_options(
157157
output: ty.Optional[ty.AnyStr],
158158
framerate: float,
159159
stats_file: ty.Optional[ty.AnyStr],
160-
downscale: ty.Optional[int],
161160
frame_skip: int,
162161
min_scene_len: str,
163162
drop_short_scenes: ty.Optional[bool],
164163
merge_last_scene: ty.Optional[bool],
165164
backend: ty.Optional[str],
165+
crop: ty.Optional[ty.Tuple[int, int, int, int]],
166+
downscale: ty.Optional[int],
166167
quiet: bool,
167168
logfile: ty.Optional[ty.AnyStr],
168169
config: ty.Optional[ty.AnyStr],
@@ -287,6 +288,7 @@ def handle_options(
287288
logger.debug(str(ex))
288289
raise click.BadParameter(str(ex), param_hint="downscale factor") from None
289290
scene_manager.interpolation = self.config.get_value("global", "downscale-method")
291+
scene_manager.crop = self.config.get_value("global", "crop", crop)
290292

291293
self.scene_manager = scene_manager
292294

@@ -545,7 +547,12 @@ def _open_video_stream(
545547
framerate=framerate,
546548
backend=backend,
547549
)
548-
logger.debug("Video opened using backend %s", type(self.video_stream).__name__)
550+
logger.debug(f"""Video information:
551+
Backend: {type(self.video_stream).__name__}
552+
Resolution: {self.video_stream.frame_size}
553+
Framerate: {self.video_stream.frame_rate}
554+
Duration: {self.video_stream.duration} ({self.video_stream.duration.frame_num} frames)""")
555+
549556
except FrameRateUnavailable as ex:
550557
raise click.BadParameter(
551558
"Failed to obtain framerate for input video. Manually specify framerate with the"

scenedetect/scene_manager.py

Lines changed: 73 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ def on_new_scene(frame_img: numpy.ndarray, frame_num: int):
112112
CutList = List[FrameTimecode]
113113
"""Type hint for a list of cuts, where each timecode represents the first frame of a new shot."""
114114

115+
CropRegion = Tuple[int, int, int, int]
116+
"""Type hint for rectangle of the form X0 Y0 X1 Y1 for cropping frames. Coordinates are relative
117+
to source frame without downscaling.
118+
"""
119+
115120
# TODO: This value can and should be tuned for performance improvements as much as possible,
116121
# until accuracy falls, on a large enough dataset. This has yet to be done, but the current
117122
# value doesn't seem to have caused any issues at least.
@@ -143,7 +148,7 @@ class Interpolation(Enum):
143148
"""Lanczos interpolation over 8x8 neighborhood."""
144149

145150

146-
def compute_downscale_factor(frame_width: int, effective_width: int = DEFAULT_MIN_WIDTH) -> int:
151+
def compute_downscale_factor(frame_width: int, effective_width: int = DEFAULT_MIN_WIDTH) -> float:
147152
"""Get the optimal default downscale factor based on a video's resolution (currently only
148153
the width in pixels is considered).
149154
@@ -157,10 +162,10 @@ def compute_downscale_factor(frame_width: int, effective_width: int = DEFAULT_MI
157162
Returns:
158163
int: The default downscale factor to use to achieve at least the target effective_width.
159164
"""
160-
assert not (frame_width < 1 or effective_width < 1)
165+
assert frame_width > 0 and effective_width > 0
161166
if frame_width < effective_width:
162167
return 1
163-
return frame_width // effective_width
168+
return frame_width / float(effective_width)
164169

165170

166171
def get_scenes_from_cuts(
@@ -651,6 +656,7 @@ def __init__(
651656

652657
self._frame_buffer = []
653658
self._frame_buffer_size = 0
659+
self._crop = None
654660

655661
@property
656662
def interpolation(self) -> Interpolation:
@@ -666,6 +672,35 @@ def stats_manager(self) -> Optional[StatsManager]:
666672
"""Getter for the StatsManager associated with this SceneManager, if any."""
667673
return self._stats_manager
668674

675+
@property
676+
def crop(self) -> Optional[CropRegion]:
677+
"""Portion of the frame to crop. Tuple of 4 ints in the form (X0, Y0, X1, Y1) where X0, Y0
678+
describes one point and X1, Y1 is another which describe a rectangle inside of the frame.
679+
Coordinates start from 0 and are inclusive. For example, with a 100x100 pixel video,
680+
(0, 0, 99, 99) covers the entire frame."""
681+
if self._crop is None:
682+
return None
683+
(x0, y0, x1, y1) = self._crop
684+
return (x0, y0, x1 - 1, y1 - 1)
685+
686+
@crop.setter
687+
def crop(self, value: CropRegion):
688+
"""Raises:
689+
ValueError: All coordinates must be >= 0.
690+
"""
691+
if value is None:
692+
self._crop = None
693+
return
694+
if not (len(value) == 4 and all(isinstance(v, int) for v in value)):
695+
raise TypeError("crop region must be tuple of 4 ints")
696+
# Verify that the provided crop results in a non-empty portion of the frame.
697+
if any(coordinate < 0 for coordinate in value):
698+
raise ValueError("crop coordinates must be >= 0")
699+
(x0, y0, x1, y1) = value
700+
# Internally we store the value in the form used to de-reference the image, which must be
701+
# one-past the end.
702+
self._crop = (x0, y0, x1 + 1, y1 + 1)
703+
669704
@property
670705
def downscale(self) -> int:
671706
"""Factor to downscale each frame by. Will always be >= 1, where 1
@@ -892,6 +927,33 @@ def detect_scenes(
892927
if end_time is not None and isinstance(end_time, (int, float)) and end_time < 0:
893928
raise ValueError("end_time must be greater than or equal to 0!")
894929

930+
effective_frame_size = video.frame_size
931+
if self._crop:
932+
logger.debug(f"Crop set: {self.crop}")
933+
x0, y0, x1, y1 = self._crop
934+
min_x, min_y = (min(x0, x1), min(y0, y1))
935+
max_x, max_y = (max(x0, x1), max(y0, y1))
936+
frame_width, frame_height = video.frame_size
937+
if min_x >= frame_width or min_y >= frame_height:
938+
raise ValueError("crop starts outside video boundary")
939+
if max_x >= frame_width or max_y >= frame_height:
940+
logger.warning("Warning: crop ends outside of video boundary.")
941+
effective_frame_size = (
942+
1 + min(max_x, frame_width) - min_x,
943+
1 + min(max_y, frame_height) - min_y,
944+
)
945+
# Calculate downscale factor and log effective resolution.
946+
if self.auto_downscale:
947+
downscale_factor = compute_downscale_factor(max(effective_frame_size))
948+
else:
949+
downscale_factor = self.downscale
950+
logger.debug(
951+
"Processing resolution: %d x %d, downscale: %1.1f",
952+
int(effective_frame_size[0] / downscale_factor),
953+
int(effective_frame_size[1] / downscale_factor),
954+
downscale_factor,
955+
)
956+
895957
self._base_timecode = video.base_timecode
896958

897959
# TODO: Figure out a better solution for communicating framerate to StatsManager.
@@ -911,19 +973,6 @@ def detect_scenes(
911973
else:
912974
total_frames = video.duration.get_frames() - start_frame_num
913975

914-
# Calculate the desired downscale factor and log the effective resolution.
915-
if self.auto_downscale:
916-
downscale_factor = compute_downscale_factor(frame_width=video.frame_size[0])
917-
else:
918-
downscale_factor = self.downscale
919-
if downscale_factor > 1:
920-
logger.info(
921-
"Downscale factor set to %d, effective resolution: %d x %d",
922-
downscale_factor,
923-
video.frame_size[0] // downscale_factor,
924-
video.frame_size[1] // downscale_factor,
925-
)
926-
927976
progress_bar = None
928977
if show_progress:
929978
progress_bar = tqdm(
@@ -980,7 +1029,7 @@ def _decode_thread(
9801029
self,
9811030
video: VideoStream,
9821031
frame_skip: int,
983-
downscale_factor: int,
1032+
downscale_factor: float,
9841033
end_time: FrameTimecode,
9851034
out_queue: queue.Queue,
9861035
):
@@ -1021,12 +1070,16 @@ def _decode_thread(
10211070
# Skip processing frames that have an incorrect size.
10221071
continue
10231072

1024-
if downscale_factor > 1:
1073+
if self._crop:
1074+
(x0, y0, x1, y1) = self._crop
1075+
frame_im = frame_im[y0:y1, x0:x1]
1076+
1077+
if downscale_factor > 1.0:
10251078
frame_im = cv2.resize(
10261079
frame_im,
10271080
(
1028-
round(frame_im.shape[1] / downscale_factor),
1029-
round(frame_im.shape[0] / downscale_factor),
1081+
max(1, round(frame_im.shape[1] / downscale_factor)),
1082+
max(1, round(frame_im.shape[0] / downscale_factor)),
10301083
),
10311084
interpolation=self._interpolation.value,
10321085
)

tests/test_cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ def test_cli_default_detector():
115115
assert invoke_scenedetect("-i {VIDEO} time {TIME}", config_file=None) == 0
116116

117117

118+
def test_cli_crop():
119+
"""Test --crop functionality."""
120+
assert invoke_scenedetect("-i {VIDEO} --crop 0 0 256 256 time {TIME}", config_file=None) == 0
121+
122+
118123
@pytest.mark.parametrize("info_command", ["help", "about", "version"])
119124
def test_cli_info_command(info_command):
120125
"""Test `scenedetect` info commands (e.g. help, about)."""

tests/test_scene_manager.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import os.path
2121
from typing import List
2222

23+
import pytest
24+
2325
from scenedetect.backends.opencv import VideoStreamCv2
2426
from scenedetect.detectors import AdaptiveDetector, ContentDetector
2527
from scenedetect.frame_timecode import FrameTimecode
@@ -255,3 +257,36 @@ def test_detect_scenes_callback_adaptive(test_video_file):
255257
scene_list = sm.get_scene_list()
256258
assert [start for start, end in scene_list] == TEST_VIDEO_START_FRAMES_ACTUAL
257259
assert fake_callback.scene_list == TEST_VIDEO_START_FRAMES_ACTUAL[1:]
260+
261+
262+
def test_detect_scenes_crop(test_video_file):
263+
video = VideoStreamCv2(test_video_file)
264+
sm = SceneManager()
265+
sm.crop = (10, 10, 1900, 1000)
266+
sm.add_detector(ContentDetector())
267+
268+
video_fps = video.frame_rate
269+
start_time = FrameTimecode("00:00:05", video_fps)
270+
end_time = FrameTimecode("00:00:15", video_fps)
271+
video.seek(start_time)
272+
sm.auto_downscale = True
273+
274+
_ = sm.detect_scenes(video=video, end_time=end_time)
275+
scene_list = sm.get_scene_list()
276+
assert [start for start, _ in scene_list] == TEST_VIDEO_START_FRAMES_ACTUAL
277+
278+
279+
def test_crop_invalid():
280+
sm = SceneManager()
281+
sm.crop = None
282+
sm.crop = (0, 0, 0, 0)
283+
sm.crop = (1, 1, 0, 0)
284+
sm.crop = (0, 0, 1, 1)
285+
with pytest.raises(TypeError):
286+
sm.crop = 1
287+
with pytest.raises(TypeError):
288+
sm.crop = (1, 1)
289+
with pytest.raises(TypeError):
290+
sm.crop = (1, 1, 1)
291+
with pytest.raises(ValueError):
292+
sm.crop = (1, 1, 1, -1)

website/pages/changelog.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,3 +588,6 @@ Development
588588
- [bugfix] Fix `ContentDetector` crash when using callbacks [#416](https://github.com/Breakthrough/PySceneDetect/issues/416) [#420](https://github.com/Breakthrough/PySceneDetect/issues/420)
589589
- [general] Timecodes of the form `MM:SS[.nnn]` are now processed correctly [#443](https://github.com/Breakthrough/PySceneDetect/issues/443)
590590
- [api] The `save_to_csv` function now works correctly with paths from the `pathlib` module
591+
- [feature] Add ability to crop input video before processing [#302](https://github.com/Breakthrough/PySceneDetect/issues/302) [#449](https://github.com/Breakthrough/PySceneDetect/issues/449)
592+
- [cli] Add `--crop` option to `scenedetect` command and config file to crop video frames before scene detection
593+
- [api] Add `crop` property to `SceneManager` to crop video frames before scene detection

0 commit comments

Comments
 (0)