gmc.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
  2. import copy
  3. import cv2
  4. import numpy as np
  5. from ultralytics.utils import LOGGER
  6. class GMC:
  7. """
  8. Generalized Motion Compensation (GMC) class for tracking and object detection in video frames.
  9. This class provides methods for tracking and detecting objects based on several tracking algorithms including ORB,
  10. SIFT, ECC, and Sparse Optical Flow. It also supports downscaling of frames for computational efficiency.
  11. Attributes:
  12. method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
  13. downscale (int): Factor by which to downscale the frames for processing.
  14. prevFrame (np.ndarray): Stores the previous frame for tracking.
  15. prevKeyPoints (List): Stores the keypoints from the previous frame.
  16. prevDescriptors (np.ndarray): Stores the descriptors from the previous frame.
  17. initializedFirstFrame (bool): Flag to indicate if the first frame has been processed.
  18. Methods:
  19. __init__: Initializes a GMC object with the specified method and downscale factor.
  20. apply: Applies the chosen method to a raw frame and optionally uses provided detections.
  21. apply_ecc: Applies the ECC algorithm to a raw frame.
  22. apply_features: Applies feature-based methods like ORB or SIFT to a raw frame.
  23. apply_sparseoptflow: Applies the Sparse Optical Flow method to a raw frame.
  24. reset_params: Resets the internal parameters of the GMC object.
  25. Examples:
  26. Create a GMC object and apply it to a frame
  27. >>> gmc = GMC(method="sparseOptFlow", downscale=2)
  28. >>> frame = np.array([[1, 2, 3], [4, 5, 6]])
  29. >>> processed_frame = gmc.apply(frame)
  30. >>> print(processed_frame)
  31. array([[1, 2, 3],
  32. [4, 5, 6]])
  33. """
  34. def __init__(self, method: str = "sparseOptFlow", downscale: int = 2) -> None:
  35. """
  36. Initialize a Generalized Motion Compensation (GMC) object with tracking method and downscale factor.
  37. Args:
  38. method (str): The method used for tracking. Options include 'orb', 'sift', 'ecc', 'sparseOptFlow', 'none'.
  39. downscale (int): Downscale factor for processing frames.
  40. Examples:
  41. Initialize a GMC object with the 'sparseOptFlow' method and a downscale factor of 2
  42. >>> gmc = GMC(method="sparseOptFlow", downscale=2)
  43. """
  44. super().__init__()
  45. self.method = method
  46. self.downscale = max(1, downscale)
  47. if self.method == "orb":
  48. self.detector = cv2.FastFeatureDetector_create(20)
  49. self.extractor = cv2.ORB_create()
  50. self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
  51. elif self.method == "sift":
  52. self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
  53. self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)
  54. self.matcher = cv2.BFMatcher(cv2.NORM_L2)
  55. elif self.method == "ecc":
  56. number_of_iterations = 5000
  57. termination_eps = 1e-6
  58. self.warp_mode = cv2.MOTION_EUCLIDEAN
  59. self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
  60. elif self.method == "sparseOptFlow":
  61. self.feature_params = dict(
  62. maxCorners=1000, qualityLevel=0.01, minDistance=1, blockSize=3, useHarrisDetector=False, k=0.04
  63. )
  64. elif self.method in {"none", "None", None}:
  65. self.method = None
  66. else:
  67. raise ValueError(f"Error: Unknown GMC method:{method}")
  68. self.prevFrame = None
  69. self.prevKeyPoints = None
  70. self.prevDescriptors = None
  71. self.initializedFirstFrame = False
  72. def apply(self, raw_frame: np.array, detections: list = None) -> np.array:
  73. """
  74. Apply object detection on a raw frame using the specified method.
  75. Args:
  76. raw_frame (np.ndarray): The raw frame to be processed, with shape (H, W, C).
  77. detections (List | None): List of detections to be used in the processing.
  78. Returns:
  79. (np.ndarray): Processed frame with applied object detection.
  80. Examples:
  81. >>> gmc = GMC(method="sparseOptFlow")
  82. >>> raw_frame = np.random.rand(480, 640, 3)
  83. >>> processed_frame = gmc.apply(raw_frame)
  84. >>> print(processed_frame.shape)
  85. (480, 640, 3)
  86. """
  87. if self.method in {"orb", "sift"}:
  88. return self.apply_features(raw_frame, detections)
  89. elif self.method == "ecc":
  90. return self.apply_ecc(raw_frame)
  91. elif self.method == "sparseOptFlow":
  92. return self.apply_sparseoptflow(raw_frame)
  93. else:
  94. return np.eye(2, 3)
  95. def apply_ecc(self, raw_frame: np.array) -> np.array:
  96. """
  97. Apply the ECC (Enhanced Correlation Coefficient) algorithm to a raw frame for motion compensation.
  98. Args:
  99. raw_frame (np.ndarray): The raw frame to be processed, with shape (H, W, C).
  100. Returns:
  101. (np.ndarray): The processed frame with the applied ECC transformation.
  102. Examples:
  103. >>> gmc = GMC(method="ecc")
  104. >>> processed_frame = gmc.apply_ecc(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
  105. >>> print(processed_frame)
  106. [[1. 0. 0.]
  107. [0. 1. 0.]]
  108. """
  109. height, width, _ = raw_frame.shape
  110. frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
  111. H = np.eye(2, 3, dtype=np.float32)
  112. # Downscale image
  113. if self.downscale > 1.0:
  114. frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
  115. frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
  116. # Handle first frame
  117. if not self.initializedFirstFrame:
  118. # Initialize data
  119. self.prevFrame = frame.copy()
  120. # Initialization done
  121. self.initializedFirstFrame = True
  122. return H
  123. # Run the ECC algorithm. The results are stored in warp_matrix.
  124. # (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)
  125. try:
  126. (_, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)
  127. except Exception as e:
  128. LOGGER.warning(f"WARNING: find transform failed. Set warp as identity {e}")
  129. return H
  130. def apply_features(self, raw_frame: np.array, detections: list = None) -> np.array:
  131. """
  132. Apply feature-based methods like ORB or SIFT to a raw frame.
  133. Args:
  134. raw_frame (np.ndarray): The raw frame to be processed, with shape (H, W, C).
  135. detections (List | None): List of detections to be used in the processing.
  136. Returns:
  137. (np.ndarray): Processed frame.
  138. Examples:
  139. >>> gmc = GMC(method="orb")
  140. >>> raw_frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
  141. >>> processed_frame = gmc.apply_features(raw_frame)
  142. >>> print(processed_frame.shape)
  143. (2, 3)
  144. """
  145. height, width, _ = raw_frame.shape
  146. frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
  147. H = np.eye(2, 3)
  148. # Downscale image
  149. if self.downscale > 1.0:
  150. frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
  151. width = width // self.downscale
  152. height = height // self.downscale
  153. # Find the keypoints
  154. mask = np.zeros_like(frame)
  155. mask[int(0.02 * height) : int(0.98 * height), int(0.02 * width) : int(0.98 * width)] = 255
  156. if detections is not None:
  157. for det in detections:
  158. tlbr = (det[:4] / self.downscale).astype(np.int_)
  159. mask[tlbr[1] : tlbr[3], tlbr[0] : tlbr[2]] = 0
  160. keypoints = self.detector.detect(frame, mask)
  161. # Compute the descriptors
  162. keypoints, descriptors = self.extractor.compute(frame, keypoints)
  163. # Handle first frame
  164. if not self.initializedFirstFrame:
  165. # Initialize data
  166. self.prevFrame = frame.copy()
  167. self.prevKeyPoints = copy.copy(keypoints)
  168. self.prevDescriptors = copy.copy(descriptors)
  169. # Initialization done
  170. self.initializedFirstFrame = True
  171. return H
  172. # Match descriptors
  173. knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)
  174. # Filter matches based on smallest spatial distance
  175. matches = []
  176. spatialDistances = []
  177. maxSpatialDistance = 0.25 * np.array([width, height])
  178. # Handle empty matches case
  179. if len(knnMatches) == 0:
  180. # Store to next iteration
  181. self.prevFrame = frame.copy()
  182. self.prevKeyPoints = copy.copy(keypoints)
  183. self.prevDescriptors = copy.copy(descriptors)
  184. return H
  185. for m, n in knnMatches:
  186. if m.distance < 0.9 * n.distance:
  187. prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt
  188. currKeyPointLocation = keypoints[m.trainIdx].pt
  189. spatialDistance = (
  190. prevKeyPointLocation[0] - currKeyPointLocation[0],
  191. prevKeyPointLocation[1] - currKeyPointLocation[1],
  192. )
  193. if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and (
  194. np.abs(spatialDistance[1]) < maxSpatialDistance[1]
  195. ):
  196. spatialDistances.append(spatialDistance)
  197. matches.append(m)
  198. meanSpatialDistances = np.mean(spatialDistances, 0)
  199. stdSpatialDistances = np.std(spatialDistances, 0)
  200. inliers = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances
  201. goodMatches = []
  202. prevPoints = []
  203. currPoints = []
  204. for i in range(len(matches)):
  205. if inliers[i, 0] and inliers[i, 1]:
  206. goodMatches.append(matches[i])
  207. prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)
  208. currPoints.append(keypoints[matches[i].trainIdx].pt)
  209. prevPoints = np.array(prevPoints)
  210. currPoints = np.array(currPoints)
  211. # Draw the keypoint matches on the output image
  212. # if False:
  213. # import matplotlib.pyplot as plt
  214. # matches_img = np.hstack((self.prevFrame, frame))
  215. # matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)
  216. # W = self.prevFrame.shape[1]
  217. # for m in goodMatches:
  218. # prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)
  219. # curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
  220. # curr_pt[0] += W
  221. # color = np.random.randint(0, 255, 3)
  222. # color = (int(color[0]), int(color[1]), int(color[2]))
  223. #
  224. # matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
  225. # matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)
  226. # matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)
  227. #
  228. # plt.figure()
  229. # plt.imshow(matches_img)
  230. # plt.show()
  231. # Find rigid matrix
  232. if prevPoints.shape[0] > 4:
  233. H, inliers = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
  234. # Handle downscale
  235. if self.downscale > 1.0:
  236. H[0, 2] *= self.downscale
  237. H[1, 2] *= self.downscale
  238. else:
  239. LOGGER.warning("WARNING: not enough matching points")
  240. # Store to next iteration
  241. self.prevFrame = frame.copy()
  242. self.prevKeyPoints = copy.copy(keypoints)
  243. self.prevDescriptors = copy.copy(descriptors)
  244. return H
  245. def apply_sparseoptflow(self, raw_frame: np.array) -> np.array:
  246. """
  247. Apply Sparse Optical Flow method to a raw frame.
  248. Args:
  249. raw_frame (np.ndarray): The raw frame to be processed, with shape (H, W, C).
  250. Returns:
  251. (np.ndarray): Processed frame with shape (2, 3).
  252. Examples:
  253. >>> gmc = GMC()
  254. >>> result = gmc.apply_sparseoptflow(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
  255. >>> print(result)
  256. [[1. 0. 0.]
  257. [0. 1. 0.]]
  258. """
  259. height, width, _ = raw_frame.shape
  260. frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)
  261. H = np.eye(2, 3)
  262. # Downscale image
  263. if self.downscale > 1.0:
  264. frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))
  265. # Find the keypoints
  266. keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)
  267. # Handle first frame
  268. if not self.initializedFirstFrame or self.prevKeyPoints is None:
  269. self.prevFrame = frame.copy()
  270. self.prevKeyPoints = copy.copy(keypoints)
  271. self.initializedFirstFrame = True
  272. return H
  273. # Find correspondences
  274. matchedKeypoints, status, _ = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)
  275. # Leave good correspondences only
  276. prevPoints = []
  277. currPoints = []
  278. for i in range(len(status)):
  279. if status[i]:
  280. prevPoints.append(self.prevKeyPoints[i])
  281. currPoints.append(matchedKeypoints[i])
  282. prevPoints = np.array(prevPoints)
  283. currPoints = np.array(currPoints)
  284. # Find rigid matrix
  285. if (prevPoints.shape[0] > 4) and (prevPoints.shape[0] == prevPoints.shape[0]):
  286. H, _ = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)
  287. if self.downscale > 1.0:
  288. H[0, 2] *= self.downscale
  289. H[1, 2] *= self.downscale
  290. else:
  291. LOGGER.warning("WARNING: not enough matching points")
  292. self.prevFrame = frame.copy()
  293. self.prevKeyPoints = copy.copy(keypoints)
  294. return H
  295. def reset_params(self) -> None:
  296. """Reset the internal parameters including previous frame, keypoints, and descriptors."""
  297. self.prevFrame = None
  298. self.prevKeyPoints = None
  299. self.prevDescriptors = None
  300. self.initializedFirstFrame = False