improve optical flow

arch · arch · commit 869fe097cf06 · 2022-04-23T19:08:28.000+02:00
diff --git a/funscript_editor/algorithms/opticalflow.py b/funscript_editor/algorithms/opticalflow.py
@@ -9,10 +9,10 @@
 
 from funscript_editor.data.ffmpegstream import FFmpegStream
 from dataclasses import dataclass
-from sklearn.decomposition import PCA
 from PyQt5 import QtCore
 from funscript_editor.algorithms.signal import Signal
 from funscript_editor.ui.opencvui import OpenCV_GUI, OpenCV_GUI_Parameters
+from funscript_editor.algorithms.ppca import PPCA
 
 import matplotlib.pyplot as plt
 
@@ -24,7 +24,7 @@ class OpticalFlowFunscriptGeneratorParameter:
     start_frame: int
     end_frame: int = -1 # default is video end (-1)
     skip_frames: int = 0
-    min_trajectory_len: int = 50
+    min_trajectory_len: int = 40
     feature_detect_interval: int = 10
     movement_filter: float = 10.0
 
@@ -59,9 +59,10 @@ def __init__(self,
 
     class OpticalFlowPyrLK:
 
-        def __init__(self, min_trajectory_len, feature_detect_interval):
+        def __init__(self, min_trajectory_len, feature_detect_interval, feature_area):
             self.min_trajectory_len = min_trajectory_len
             self.feature_detect_interval = feature_detect_interval
+            self.feature_area = feature_area
             self.trajectories = []
             self.frame_idx = 0
             self.prev_frame_gray = None
@@ -94,6 +95,7 @@ def update(self, frame_roi):
                 for trajectory, (x, y), good_flag in zip(self.trajectories, p1.reshape(-1, 2), good):
                     if not good_flag:
                         if len (trajectory) > self.min_trajectory_len:
+                            # print('add trajectorie from', self.frame_idx - len(trajectory), 'to', self.frame_idx)
                             self.result.append({'end': self.frame_idx, 'trajectory': trajectory})
                         continue
                     trajectory.append((x, y))
@@ -103,11 +105,14 @@ def update(self, frame_roi):
 
 
             if len(self.trajectories) == 0 or self.frame_idx % self.feature_detect_interval == 0:
-                mask = np.zeros_like(frame_gray)
+                seach_img = frame_gray[self.feature_area[1]:self.feature_area[1]+self.feature_area[3], self.feature_area[0]:self.feature_area[0]+self.feature_area[2]]
+                mask = np.zeros_like(seach_img)
                 mask[:] = 255
-                p = cv2.goodFeaturesToTrack(frame_gray, mask = mask, **self.feature_params)
+                p = cv2.goodFeaturesToTrack(seach_img, mask = mask, **self.feature_params)
                 if p is not None:
                     for x, y in np.float32(p).reshape(-1, 2):
+                        x += self.feature_area[0]
+                        y += self.feature_area[1]
                         if any(abs(t[-1][0] - x) < 3 and abs(t[-1][1] - y) < 3 for t in self.trajectories):
                             continue
 
@@ -135,8 +140,9 @@ def extract_movement(self, optical_flow_result, metric_idx = 1, filter_static_po
             zero_before = r['end'] - len(r['trajectory'])
             zero_after = optical_flow_result['meta']['last_idx'] - r['end']
             trajectory_min = min([item[metric_idx] for item in r['trajectory']])
-            y = [0 for _ in range(zero_before)] + [(r['trajectory'][i][metric_idx] - trajectory_min)**2 for i in range(len(r['trajectory']))] + [0 for _ in range(zero_after)]
-            if not filter_static_points or (max(y) - min(y)) > self.params.movement_filter:
+            action = [(r['trajectory'][i][metric_idx] - trajectory_min) for i in range(len(r['trajectory']))]
+            y = [None for _ in range(zero_before)] + action + [None for _ in range(zero_after)]
+            if not filter_static_points or (max(action) - min(action)) > self.params.movement_filter:
                 result.append(y)
 
         return result
@@ -145,13 +151,16 @@ def extract_movement(self, optical_flow_result, metric_idx = 1, filter_static_po
     def get_absolute_framenumber(self, frame_number: int) -> int:
         """ Get the absoulte frame number
 
+        Note:
+            We have an offset of 1 because we use the first frame for init
+
         Args:
             frame_number (int): relative frame number
 
         Returns:
             int: absolute frame position
         """
-        return self.params.start_frame + frame_number
+        return self.params.start_frame + frame_number + 1
 
 
     def tracking(self) -> str:
@@ -175,14 +184,43 @@ def tracking(self) -> str:
         if first_frame is None:
             return "FFmpeg could not extract the first video frame"
 
-        roi = self.ui.bbox_selector(
-            first_frame,
+        preview_frame = copy.copy(first_frame)
+        search_roi = self.ui.bbox_selector(
+            preview_frame,
             "Select observe area of an single person",
         )
 
+        preview_frame = self.ui.draw_box_to_image(
+                preview_frame,
+                search_roi,
+                color=(0,255,0)
+            )
+
+        while True:
+            feature_roi = self.ui.bbox_selector(
+                preview_frame,
+                "Select feature area inside the observe area",
+            )
+
+            if feature_roi[0] > search_roi[0] \
+                and feature_roi[1] > search_roi[1] \
+                and feature_roi[0] + feature_roi[2] < search_roi[0] + search_roi[2] \
+                and feature_roi[1] + feature_roi[3] < search_roi[1] + search_roi[3]:
+                break
+
+            self.logger.warning("Invalid feature")
+
+        feature_roi = [
+                feature_roi[0] - search_roi[0],
+                feature_roi[1] - search_roi[1],
+                feature_roi[2],
+                feature_roi[3]
+            ]
+
         optical_flow = OpticalFlowFunscriptGeneratorThread.OpticalFlowPyrLK(
                 min_trajectory_len = self.params.min_trajectory_len,
-                feature_detect_interval = self.params.feature_detect_interval
+                feature_detect_interval = self.params.feature_detect_interval,
+                feature_area = feature_roi
             )
 
         status = "End of video reached"
@@ -199,48 +237,31 @@ def tracking(self) -> str:
                 status = "Tracking stop at existing action point"
                 break
 
-            frame_roi = frame[roi[1]:roi[1]+roi[3], roi[0]:roi[0]+roi[2], :]
+            frame_roi = frame[search_roi[1]:search_roi[1]+search_roi[3], search_roi[0]:search_roi[0]+search_roi[2], :]
             current_features = optical_flow.update(frame_roi)
 
             for f in current_features:
-                cv2.circle(frame, (int(roi[0]+f[0]), int(roi[1]+f[1])), 3, (0, 0, 255), -1)
+                cv2.circle(frame, (int(search_roi[0]+f[0]), int(search_roi[1]+f[1])), 3, (0, 0, 255), -1)
 
             key = self.ui.preview(
                     frame,
                     frame_num + self.params.start_frame,
                     texte = ["Press 'q' to stop tracking"],
-                    boxes = [roi],
+                    boxes = [search_roi],
                 )
 
             if self.ui.was_key_pressed('q') or key == ord('q'):
                 status = 'Tracking stopped by user'
                 break
 
         result = optical_flow.get_result()
-
-        # for filter_static_points in [True, False]:
-        #     test = self.extract_movement(result, filter_static_points=filter_static_points)
-        #     for i in [1, 2, 3, 4]:
-        #         pca = PCA(n_components=i)
-        #         principalComponents = pca.fit_transform(np.transpose(np.array(test)))
-        #         test_result = np.array(principalComponents)
-        #         plt.plot(test_result)
-        #         plt.savefig('debug_{}_{}.png'.format(filter_static_points, i), dpi=400)
-        #         plt.close()
-
         result = self.extract_movement(result)
 
-        pca = PCA(n_components=1)
-        principalComponents = pca.fit_transform(np.transpose(np.array(result)))
-        result = [x[0] for x in principalComponents]
-
-        # option for pca 2 with two moving persons:
-        # result = np.transpose(np.array(principalComponents))
-        # result = np.array(result[0]) - np.array(result[1])
+        _, _, _, principalComponents, _ = PPCA(np.transpose(np.array(result, dtype=float)), d=1)
+        result = [x[0] for x in principalComponents.tolist()]
 
         signal = Signal(self.video_info.fps)
         points = signal.get_local_min_max_points(result)
-        # points = signal.get_direction_changes(result, filter_len=4)
         categorized_points = signal.categorize_points(result, points)
 
         for k in self.funscripts:
diff --git a/funscript_editor/algorithms/ppca.py b/funscript_editor/algorithms/ppca.py
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Python implemention of PPCA-EM for data with missing values
+Adapted from MATLAB implemention from J.J. VerBeek
+"""
+
+import numpy as np
+from numpy import shape, isnan, nanmean, average, log, cov
+from numpy.matlib import repmat
+from numpy.random import normal
+from numpy.linalg import inv, det, eig
+from numpy import identity as eye
+from numpy import trace as tr
+from scipy.linalg import orth
+
+
+def PPCA(Y, d):
+    """
+    Implements probabilistic PCA for data with missing values,
+    using a factorizing distribution over hidden states and hidden observations.
+
+    Args:
+        Y (np.ndarray) input numpy ndarray of data vectors (N  by D)
+        d (int): dimension of latent space
+
+    Returns:
+        C (D by d): C*C' + I*ss is covariance model, C has scaled principal directions as cols
+        ss (float): isotropic variance outside subspace
+        M (D by 1): data mean
+        X (N by d): expected states
+        Ye (N by D): expected complete observations (differs from Y if data is missing)
+
+        Based on MATLAB code from J.J. VerBeek, 2006. http://lear.inrialpes.fr/~verbeek
+    """
+    N, D = shape(Y)  # N observations in D dimensions (i.e. D is number of features, N is samples)
+    threshold = 1E-4  # minimal relative change in objective function to continue
+    hidden = isnan(Y)
+    missing = hidden.sum()
+
+    if missing > 0:
+        M = nanmean(Y, axis=0)
+    else:
+        M = average(Y, axis=0)
+
+    Ye = Y - repmat(M, N, 1)
+
+    if missing > 0:
+        Ye[hidden] = 0
+
+    # initialize
+    C = normal(loc=0.0, scale=1.0, size=(D, d))
+    CtC = C.T @ C
+    X = Ye @ C @ inv(CtC)
+    recon = X @ C.T
+    recon[hidden] = 0
+    ss = np.sum((recon - Ye) ** 2) / (N * D - missing)
+
+    count = 1
+    old = np.inf
+
+    # EM Iterations
+    while (count):
+        Sx = inv(eye(d) + CtC / ss)  # E-step, covariances
+        ss_old = ss
+        if missing > 0:
+            proj = X @ C.T
+            Ye[hidden] = proj[hidden]
+
+        X = Ye @ C @ Sx / ss  # E-step: expected values
+
+        SumXtX = X.T @ X  # M-step
+        C = Ye.T @ X @ (SumXtX + N * Sx).T @ inv(((SumXtX + N * Sx) @ (SumXtX + N * Sx).T))
+        CtC = C.T @ C
+        ss = (np.sum((X @ C.T - Ye) ** 2) + N * np.sum(CtC * Sx) + missing * ss_old) / (N * D)
+        # transform Sx determinant into numpy longdouble in order to deal with high dimensionality
+        Sx_det = np.min(Sx).astype(np.longdouble) ** shape(Sx)[0] * det(Sx / np.min(Sx))
+        objective = N * D + N * (D * log(ss) + tr(Sx) - log(Sx_det)) + tr(SumXtX) - missing * log(ss_old)
+
+        rel_ch = np.abs(1 - objective / old)
+        old = objective
+
+        count = count + 1
+        if rel_ch < threshold and count > 5:
+            count = 0
+
+    C = orth(C)
+    covM = cov((Ye @ C).T)
+    if d == 1:
+        covM = [[covM]]
+    vals, vecs = eig(covM)
+    ordr = np.argsort(vals)[::-1]
+    vecs = vecs[:, ordr]
+
+    C = C @ vecs
+    X = Ye @ C
+
+    # add data mean to expected complete data
+    Ye = Ye + repmat(M, N, 1)
+
+    return C, ss, M, X, Ye