diff --git a/dvr_scan/scanner.py b/dvr_scan/scanner.py index ea1c7b1..6e2fb62 100644 --- a/dvr_scan/scanner.py +++ b/dvr_scan/scanner.py @@ -35,7 +35,7 @@ from dvr_scan.overlays import BoundingBoxOverlay, TextOverlay from dvr_scan.platform import HAS_TKINTER, get_filename, get_min_screen_bounds, is_ffmpeg_available from dvr_scan.region import Point, Size, bound_point, load_regions -from dvr_scan.subtractor import SubtractorCNT, SubtractorCudaMOG2, SubtractorMOG2 +from dvr_scan.subtractor import Subtractor, SubtractorCNT, SubtractorCudaMOG2, SubtractorMOG2 from dvr_scan.video_joiner import VideoJoiner if HAS_TKINTER: @@ -216,7 +216,7 @@ class MotionScanner: def __init__( self, - input_videos: List[AnyStr], + input_videos: Union[List[AnyStr], VideoJoiner], frame_skip: int = 0, show_progress: bool = False, debug_mode: bool = False, @@ -236,7 +236,7 @@ def __init__( # Scan state and options they come from: - # Output Parameters (set_output) + # Output Parameters self._comp_file: Optional[AnyStr] = None # -o/--output self._mask_file: Optional[AnyStr] = None # -mo/--mask-output self._fourcc: Any = None # opencv-codec @@ -247,27 +247,24 @@ def __init__( # TODO: Replace uses of self._output_dir with # a helper function called "get_output_path". - # Overlay Parameters (set_overlays) + # Overlay Parameters self._timecode_overlay = None # -tc/--time-code, None or TextOverlay self._metrics_overlay = None # -fm/--frame-metrics, None or TextOverlay self._bounding_box = None # -bb/--bounding-box, None or BoundingBoxOverlay - # Motion Detection Parameters (set_detection_params) - self._subtractor_type = DetectorType.MOG2 # -b/--bg-subtractor + # Motion Detection Parameters self._threshold = 0.15 # -t/--threshold - self._variance_threshold = 16.0 # variance-threshold - self._kernel_size = None # -k/--kernel-size - self._downscale_factor = 1 # -df/--downscale-factor - self._learning_rate = -1 # learning-rate self._max_threshold = 255.0 # max-threshold + self._subtractor: Optional[Subtractor] = None - # Motion Event Parameters (set_event_params) + # Motion Event Parameters self._min_event_len = None # -l/--min-event-length - self._pre_event_len = None # -tb/--time-before-event - self._post_event_len = None # -tp/--time-post-event + self._min_event_dist = None # --min-event-dist # TODO: Implement this to fix #72. + self._time_before_event = None # -tb/--time-before-event + self._time_post_event = None # -tp/--time-post-event self._use_pts = None # --use_pts - # Region Parameters (set_region) + # Region Parameters self._region_editor = False # -w/--region-window self._regions: List[List[Point]] = [] # -a/--add-region, -w/--region-window self._load_region: Optional[str] = None # -R/--load-region @@ -276,8 +273,10 @@ def __init__( self._show_roi_window_deprecated = False self._roi_deprecated = None - # Input Video Parameters (set_video_time) - self._input: VideoJoiner = VideoJoiner(input_videos) # -i/--input + # Input Video Parameters + self._input: VideoJoiner = ( # -i/--input + input_videos if isinstance(input_videos, VideoJoiner) else VideoJoiner(input_videos) + ) self._frame_skip: int = frame_skip # -fs/--frame-skip self._start_time: FrameTimecode = None # -st/--start-time self._end_time: FrameTimecode = None # -et/--end-time @@ -291,7 +290,7 @@ def __init__( self._mask_writer: Optional[cv2.VideoWriter] = None self._num_events: int = 0 - # Thumbnail production (set_thumbnail_params) + # Thumbnail production self._thumbnails = None self._highscore = 0 self._highframe = None @@ -409,6 +408,7 @@ def set_detection_params( learning_rate: float = -1, ): """Set detection parameters.""" + self._threshold = threshold self._max_threshold = max_threshold self._subtractor_type = detector_type @@ -427,6 +427,35 @@ def set_detection_params( # We should also investigate how this works for CNT and other subtractors. self._learning_rate = learning_rate + # Calculate size of noise reduction kernel. Even if an ROI is set, the auto factor is + # set based on the original video's input resolution. + # TODO(#194): We should probably not scale the kernel size if the user set it. They can + # adjust it for the downscale factor manually, doing it without warning is unintuitive. + + kernel_size = ( + _scale_kernel_size(self._kernel_size, self._downscale_factor) + if self._kernel_size != -1 + else _recommended_kernel_size(self._input.resolution[0], self._downscale_factor) + ) + # Create background subtractor from parameters. + SubtractorType = self._subtractor_type.value + self._subtractor = SubtractorType( + # TODO(v1.7): Don't set or log unused parameter variance_threshold if CNT is used. + variance_threshold=self._variance_threshold, + kernel_size=kernel_size, + learning_rate=self._learning_rate, + ) + + logger.info( + "Using subtractor %s with kernel_size = %s%s, " + "variance_threshold = %s and learning_rate = %s", + self._subtractor_type.name, + str(kernel_size) if kernel_size else "off", + " (auto)" if kernel_size == -1 else "", + str(self._variance_threshold) if self._variance_threshold != 16.0 else "auto", + str(self._learning_rate) if self._learning_rate != -1 else "auto", + ) + def set_regions( self, region_editor: bool = False, @@ -471,8 +500,8 @@ def set_event_params( """Set motion event parameters.""" assert self._input.framerate is not None self._min_event_len = FrameTimecode(min_event_len, self._input.framerate) - self._pre_event_len = FrameTimecode(time_pre_event, self._input.framerate) - self._post_event_len = FrameTimecode(time_post_event, self._input.framerate) + self._time_before_event = FrameTimecode(time_pre_event, self._input.framerate) + self._time_post_event = FrameTimecode(time_post_event, self._input.framerate) self._use_pts = use_pts def set_thumbnail_params(self, thumbnails: str = None): @@ -659,42 +688,17 @@ def scan(self) -> Optional[DetectionResult]: logger.info("Exiting...") return None - if self._kernel_size == -1: - # Calculate size of noise reduction kernel. Even if an ROI is set, the auto factor is - # set based on the original video's input resolution. - kernel_size = _recommended_kernel_size( - self._input.resolution[0], self._downscale_factor - ) - else: - kernel_size = _scale_kernel_size(self._kernel_size, self._downscale_factor) - - # Create background subtractor and motion detector. - # TODO(v1.7): Don't set or log unused parameter variance_threshold - # if CNT is used. - detector = MotionDetector( - subtractor=self._subtractor_type.value( - variance_threshold=self._variance_threshold, - kernel_size=kernel_size, - learning_rate=self._learning_rate, - ), + # Create motion detector. + self._detector = MotionDetector( + subtractor=self._subtractor, frame_size=self._input.resolution, downscale=self._downscale_factor, regions=self._regions, ) - logger.info( - "Using subtractor %s with kernel_size = %s%s, " - "variance_threshold = %s and learning_rate = %s", - self._subtractor_type.name, - str(kernel_size) if kernel_size else "off", - " (auto)" if self._kernel_size == -1 else "", - str(self._variance_threshold) if self._variance_threshold != 16.0 else "auto", - str(self._learning_rate) if self._learning_rate != -1 else "auto", - ) - # Correct event length parameters to account frame skip. - post_event_len: int = self._post_event_len.frame_num // (self._frame_skip + 1) - pre_event_len: int = self._pre_event_len.frame_num // (self._frame_skip + 1) + post_event_len: int = self._time_post_event.frame_num // (self._frame_skip + 1) + pre_event_len: int = self._time_before_event.frame_num // (self._frame_skip + 1) min_event_len: int = max(self._min_event_len.frame_num // (self._frame_skip + 1), 1) # Calculations below rely on min_event_len always being >= 1 (cannot be zero) @@ -705,26 +709,24 @@ def scan(self) -> Optional[DetectionResult]: # need to compensate for rounding errors when we corrected it for frame skip. This is # important as this affects the number of frames we consider for the actual motion event. if not self._use_pts: - start_event_shift: int = self._pre_event_len.frame_num + min_event_len * ( + start_event_shift: int = self._time_before_event.frame_num + min_event_len * ( self._frame_skip + 1 ) else: start_event_shift_ms: float = ( - self._pre_event_len.get_seconds() + self._min_event_len.get_seconds() + self._time_before_event.get_seconds() + self._min_event_len.get_seconds() ) * 1000 # Length of buffer we require in memory to keep track of all frames required for -l and -tb. buff_len = pre_event_len + min_event_len event_end = self._input.position - if not self._use_pts: - last_frame_above_threshold = 0 - else: - last_frame_above_threshold_ms = 0 + last_frame_above_threshold = 0 + last_frame_above_threshold_ms = 0 if self._bounding_box: self._bounding_box.set_corrections( downscale_factor=self._downscale_factor, - shift=(detector.area[0].x, detector.area[0].y), + shift=(self._detector.area[0].x, self._detector.area[0].y), frame_skip=self._frame_skip, ) @@ -780,11 +782,11 @@ def scan(self) -> Optional[DetectionResult]: if frame_size != self._input.resolution: time = frame.timecode video_res = self._input.resolution - logger.warn( + logger.warning( f"WARNING: Frame {time.frame_num} [{time.get_timecode()}] has unexpected size: " f"{frame_size[0]}x{frame_size[1]}, expected {video_res[0]}x{video_res[1]}" ) - result = detector.update(frame.frame_bgr) + result = self._detector.update(frame.frame_bgr) frame_score = result.score # TODO(1.7): Allow disabling the rejection filter or customizing amount of # consecutive frames it will ignore. @@ -825,6 +827,11 @@ def scan(self) -> Optional[DetectionResult]: ) ) + # + # TODO: Make a small state diagram and create a new state enum to handle different + # merging modes, etc. + # + # Last frame was part of a motion event, or still within the post-event window. if in_motion_event: # If this frame still has motion, reset the post-event window. @@ -839,67 +846,39 @@ def scan(self) -> Optional[DetectionResult]: # # TODO(#72): We should wait until the max of *both* the pre-event and post- # event windows have passed. Right now we just consider the post-event window. + # We should also allow configuring overlap behavior: + # - normal: If any new motion is found within max(time_pre_event, time_post_event), + # it will be merged with the preceeding event. + # - extended: Events that have a gap of size (time_pre_event + time_post_event) + # between each other will be merged. else: num_frames_post_event += 1 - if num_frames_post_event >= post_event_len: + if num_frames_post_event >= (pre_event_len + post_event_len): in_motion_event = False - - logger.debug( - "event %d high score %f" % (1 + self._num_events, self._highscore) + # TODO: We can't throw these frames away, they might be needed for the + # next event to satisfy it's own pre_event_len. + buffered_frames = [] + event = self._on_event_end( + last_frame_above_threshold, + last_frame_above_threshold_ms, + event_start, ) - if self._thumbnails == "highscore": - video_name = get_filename( - path=self._input.paths[0], include_extension=False - ) - output_path = ( - self._comp_file - if self._comp_file - else OUTPUT_FILE_TEMPLATE.format( - VIDEO_NAME=video_name, - EVENT_NUMBER="%04d" % (1 + self._num_events), - EXTENSION="jpg", - ) - ) - if self._output_dir: - output_path = os.path.join(self._output_dir, output_path) - cv2.imwrite(output_path, self._highframe) - self._highscore = 0 - self._highframe = None - - # Calculate event end based on the last frame we had with motion plus - # the post event length time. We also need to compensate for the number - # of frames that we skipped that could have had motion. - # We also add 1 to include the presentation duration of the last frame. - if not self._use_pts: - event_end = FrameTimecode( - 1 - + last_frame_above_threshold - + self._post_event_len.frame_num - + self._frame_skip, - self._input.framerate, - ) - assert event_end.frame_num >= event_start.frame_num - else: - event_end = FrameTimecode( - (last_frame_above_threshold_ms / 1000) - + self._post_event_len.get_seconds(), - self._input.framerate, - ) - assert event_end.get_seconds() >= event_start.get_seconds() - event_list.append(MotionEvent(start=event_start, end=event_end)) + event_list.append(event) if self._output_mode != OutputMode.SCAN_ONLY: encode_queue.put(MotionEvent(start=event_start, end=event_end)) - # Send frame to encode thread. if in_motion_event and self._output_mode == OutputMode.OPENCV: - encode_queue.put( - EncodeFrameEvent( - frame_bgr=frame.frame_bgr, - timecode=frame.timecode, - bounding_box=bounding_box, - score=frame_score, - ) + encode_frame = EncodeFrameEvent( + frame_bgr=frame.frame_bgr, + timecode=frame.timecode, + bounding_box=bounding_box, + score=frame_score, ) + if num_frames_post_event < post_event_len: + encode_queue.put(encode_frame) + else: + buffered_frames.append(encode_frame) + # Not already in a motion event, look for a new one. else: # Buffer the required amount of frames and overlay data until we find an event. @@ -1017,6 +996,48 @@ def scan(self) -> Optional[DetectionResult]: return DetectionResult(event_list, frames_processed) + def _on_event_end( + self, + last_frame_above_threshold, + last_frame_above_threshold_ms, + event_start, + ) -> MotionEvent: + logger.debug("event %d high score %f" % (1 + self._num_events, self._highscore)) + if self._thumbnails == "highscore": + video_name = get_filename(path=self._input.paths[0], include_extension=False) + output_path = ( + self._comp_file + if self._comp_file + else OUTPUT_FILE_TEMPLATE.format( + VIDEO_NAME=video_name, + EVENT_NUMBER="%04d" % (1 + self._num_events), + EXTENSION="jpg", + ) + ) + if self._output_dir: + output_path = os.path.join(self._output_dir, output_path) + cv2.imwrite(output_path, self._highframe) + self._highscore = 0 + self._highframe = None + + # Calculate event end based on the last frame we had with motion plus + # the post event length time. We also need to compensate for the number + # of frames that we skipped that could have had motion. + # We also add 1 to include the presentation duration of the last frame. + if not self._use_pts: + event_end = FrameTimecode( + 1 + last_frame_above_threshold + self._time_post_event.frame_num + self._frame_skip, + self._input.framerate, + ) + assert event_end.frame_num >= event_start.frame_num + else: + event_end = FrameTimecode( + (last_frame_above_threshold_ms / 1000) + self._time_post_event.get_seconds(), + self._input.framerate, + ) + assert event_end.get_seconds() >= event_start.get_seconds() + return MotionEvent(start=event_start, end=event_end) + def _decode_thread(self, decode_queue: queue.Queue): try: while not self._stop.is_set(): diff --git a/dvr_scan/video_joiner.py b/dvr_scan/video_joiner.py index ce6f878..e1c1ed5 100644 --- a/dvr_scan/video_joiner.py +++ b/dvr_scan/video_joiner.py @@ -29,7 +29,6 @@ logger = logging.getLogger("dvr_scan") -# TODO: Replace this with the equivalent from PySceneDetect when available. class VideoJoiner: """Handles concatenating multiple videos together. @@ -69,7 +68,7 @@ def framerate(self) -> float: return self._cap.frame_rate @property - def total_frames(self) -> float: + def total_frames(self) -> int: """Total number of frames of all input videos combined. May be inaccurate.""" return self._total_frames diff --git a/tests/test_scan_context.py b/tests/test_scan_context.py index 7e6e5f6..ace37af 100644 --- a/tests/test_scan_context.py +++ b/tests/test_scan_context.py @@ -16,11 +16,14 @@ import platform import typing as ty +import numpy as np import pytest +from scenedetect import FrameTimecode from dvr_scan.region import Point -from dvr_scan.scanner import DetectorType, MotionScanner -from dvr_scan.subtractor import SubtractorCNT, SubtractorCudaMOG2 +from dvr_scan.scanner import DetectorType, MotionEvent, MotionScanner +from dvr_scan.subtractor import Subtractor, SubtractorCNT, SubtractorCudaMOG2 +from dvr_scan.video_joiner import VideoJoiner MACHINE_ARCH = platform.machine().upper() @@ -237,3 +240,109 @@ def test_start_duration(traffic_camera_video): event_list = [(event.start.frame_num, event.end.frame_num) for event in event_list] # The set duration should only cover the middle event. compare_event_lists(event_list, TRAFFIC_CAMERA_EVENTS[1:2], EVENT_FRAME_TOLERANCE) + + +TRAFFIC_CAMERA_EVENTS_MERGE_WITHIN_TIME_BEFORE = [ + (2, 149), + (306, 576), +] + + +def test_merge_within_time_before(traffic_camera_video): + """Test setting time_pre_event.""" + scanner = MotionScanner([traffic_camera_video]) + scanner.set_regions(regions=[TRAFFIC_CAMERA_ROI]) + scanner.set_event_params(min_event_len=4, time_pre_event=52) + event_list = scanner.scan().event_list + event_list = [(event.start.frame_num, event.end.frame_num) for event in event_list] + compare_event_lists( + event_list, TRAFFIC_CAMERA_EVENTS_MERGE_WITHIN_TIME_BEFORE, EVENT_FRAME_TOLERANCE + ) + + +class FakeVideo(VideoJoiner): + def __init__(self): + self._position = FrameTimecode(0, fps=self.framerate) + + pass + + @property + def paths(self): + return ["fake_path.mp4"] + + @property + def resolution(self): + return (1, 1) + + @property + def framerate(self) -> float: + return 1.0 + + @property + def total_frames(self) -> int: + return 1000 + + @property + def decode_failures(self) -> float: + return 0 + + @property + def position(self) -> FrameTimecode: + return self._position + 1 + + @property + def position_ms(self) -> float: + return self._position.get_seconds() / 1000.0 + + def read(self, decode: bool = True) -> ty.Optional[np.ndarray]: + if self._position.get_frames() >= self.total_frames: + return None + img = np.zeros((self.resolution[1], self.resolution[0], 3), dtype=np.uint8) + self._position += 1 + return img + + def seek(self, target: FrameTimecode): + pass + + +def test_fake_video(): + # With default subtractor it won't have any motion, it's just empty frames. + scanner = MotionScanner(FakeVideo()) + assert scanner.scan().event_list == [] + + +# A fake subtractor we control to give a specific set of frame scores to test boundary and event +# merging behaviors. +class FakeSubtractor(Subtractor): + def __init__(self, events: ty.List[MotionEvent]): + self._frame_num = 0 + assert events + self._events = events + self._curr_event = 0 + + def apply(self, frame: np.ndarray) -> np.ndarray: + self._frame_num += 1 + frame = np.copy(frame[:, :, 0]) + if self._curr_event >= len(self._events): + return frame + if self._frame_num > self._events[self._curr_event].end: + self._curr_event += 1 + return frame + if self._frame_num > self._events[self._curr_event].start: + return np.add(frame, 254) # Scores of 255 are rejected by default. + return frame + + @staticmethod + def is_available(): + return True + + +def test_fake_subtractor(): + scanner = MotionScanner(FakeVideo()) + base_time = FrameTimecode(0, scanner._input.framerate) + expected_events = [MotionEvent(start=(base_time + 100), end=(base_time + 999))] + scanner._subtractor = FakeSubtractor(events=expected_events) + # TODO(#72): This should be the same as the above list ideally, figure out why it's not. + assert scanner.scan().event_list == [ + MotionEvent(start=(base_time + 99), end=(base_time + 1001)) + ]