loi_heads.py 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345
  1. from typing import Dict, List, Optional, Tuple
  2. import matplotlib.pyplot as plt
  3. import torch
  4. import torch.nn.functional as F
  5. import torchvision
  6. from torch import nn, Tensor
  7. from libs.vision_libs.ops import boxes as box_ops, roi_align
  8. import libs.vision_libs.models.detection._utils as det_utils
  9. from collections import OrderedDict
  10. def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
  11. # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
  12. """
  13. Computes the loss for Faster R-CNN.
  14. Args:
  15. class_logits (Tensor)
  16. box_regression (Tensor)
  17. labels (list[BoxList])
  18. regression_targets (Tensor)
  19. Returns:
  20. classification_loss (Tensor)
  21. box_loss (Tensor)
  22. """
  23. # print(f'compute fastrcnn_loss:{labels}')
  24. labels = torch.cat(labels, dim=0)
  25. regression_targets = torch.cat(regression_targets, dim=0)
  26. classification_loss = F.cross_entropy(class_logits, labels)
  27. # get indices that correspond to the regression targets for
  28. # the corresponding ground truth labels, to be used with
  29. # advanced indexing
  30. sampled_pos_inds_subset = torch.where(labels > 0)[0]
  31. labels_pos = labels[sampled_pos_inds_subset]
  32. N, num_classes = class_logits.shape
  33. box_regression = box_regression.reshape(N, box_regression.size(-1) // 4, 4)
  34. box_loss = F.smooth_l1_loss(
  35. box_regression[sampled_pos_inds_subset, labels_pos],
  36. regression_targets[sampled_pos_inds_subset],
  37. beta=1 / 9,
  38. reduction="sum",
  39. )
  40. box_loss = box_loss / labels.numel()
  41. return classification_loss, box_loss
  42. def maskrcnn_inference(x, labels):
  43. # type: (Tensor, List[Tensor]) -> List[Tensor]
  44. """
  45. From the results of the CNN, post process the masks
  46. by taking the mask corresponding to the class with max
  47. probability (which are of fixed size and directly output
  48. by the CNN) and return the masks in the mask field of the BoxList.
  49. Args:
  50. x (Tensor): the mask logits
  51. labels (list[BoxList]): bounding boxes that are used as
  52. reference, one for ech image
  53. Returns:
  54. results (list[BoxList]): one BoxList for each image, containing
  55. the extra field mask
  56. """
  57. mask_prob = x.sigmoid()
  58. # select masks corresponding to the predicted classes
  59. num_masks = x.shape[0]
  60. boxes_per_image = [label.shape[0] for label in labels]
  61. labels = torch.cat(labels)
  62. index = torch.arange(num_masks, device=labels.device)
  63. mask_prob = mask_prob[index, labels][:, None]
  64. mask_prob = mask_prob.split(boxes_per_image, dim=0)
  65. return mask_prob
  66. def project_masks_on_boxes(gt_masks, boxes, matched_idxs, M):
  67. # type: (Tensor, Tensor, Tensor, int) -> Tensor
  68. """
  69. Given segmentation masks and the bounding boxes corresponding
  70. to the location of the masks in the image, this function
  71. crops and resizes the masks in the position defined by the
  72. boxes. This prepares the masks for them to be fed to the
  73. loss computation as the targets.
  74. """
  75. matched_idxs = matched_idxs.to(boxes)
  76. rois = torch.cat([matched_idxs[:, None], boxes], dim=1)
  77. gt_masks = gt_masks[:, None].to(rois)
  78. return roi_align(gt_masks, rois, (M, M), 1.0)[:, 0]
  79. def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs):
  80. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  81. """
  82. Args:
  83. proposals (list[BoxList])
  84. mask_logits (Tensor)
  85. targets (list[BoxList])
  86. Return:
  87. mask_loss (Tensor): scalar tensor containing the loss
  88. """
  89. discretization_size = mask_logits.shape[-1]
  90. labels = [gt_label[idxs] for gt_label, idxs in zip(gt_labels, mask_matched_idxs)]
  91. mask_targets = [
  92. project_masks_on_boxes(m, p, i, discretization_size) for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)
  93. ]
  94. labels = torch.cat(labels, dim=0)
  95. mask_targets = torch.cat(mask_targets, dim=0)
  96. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  97. # accept empty tensors, so handle it separately
  98. if mask_targets.numel() == 0:
  99. return mask_logits.sum() * 0
  100. mask_loss = F.binary_cross_entropy_with_logits(
  101. mask_logits[torch.arange(labels.shape[0], device=labels.device), labels], mask_targets
  102. )
  103. return mask_loss
  104. def line_points_to_heatmap(keypoints, rois, heatmap_size):
  105. # type: (Tensor, Tensor, int) -> Tuple[Tensor, Tensor]
  106. print(f'rois:{rois.shape}')
  107. print(f'heatmap_size:{heatmap_size}')
  108. offset_x = rois[:, 0]
  109. offset_y = rois[:, 1]
  110. scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
  111. scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
  112. offset_x = offset_x[:, None]
  113. offset_y = offset_y[:, None]
  114. scale_x = scale_x[:, None]
  115. scale_y = scale_y[:, None]
  116. print(f'keypoints.shape:{keypoints.shape}')
  117. # batch_size, num_keypoints, _ = keypoints.shape
  118. x = keypoints[..., 0]
  119. y = keypoints[..., 1]
  120. # gs=generate_gaussian_heatmaps(x,y,512,1.0)
  121. # print(f'gs_heatmap shape:{gs.shape}')
  122. #
  123. # show_heatmap(gs,'target')
  124. x_boundary_inds = x == rois[:, 2][:, None]
  125. y_boundary_inds = y == rois[:, 3][:, None]
  126. x = (x - offset_x) * scale_x
  127. x = x.floor().long()
  128. y = (y - offset_y) * scale_y
  129. y = y.floor().long()
  130. x[x_boundary_inds] = heatmap_size - 1
  131. y[y_boundary_inds] = heatmap_size - 1
  132. # print(f'heatmaps x:{x}')
  133. # print(f'heatmaps y:{y}')
  134. valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
  135. vis = keypoints[..., 2] > 0
  136. valid = (valid_loc & vis).long()
  137. gs_heatmap=generate_gaussian_heatmaps(x,y,heatmap_size,1.0)
  138. # show_heatmap(gs_heatmap[0],'feature')
  139. print(f'gs_heatmap:{gs_heatmap.shape}')
  140. #
  141. # lin_ind = y * heatmap_size + x
  142. # print(f'lin_ind:{lin_ind.shape}')
  143. # heatmaps = lin_ind * valid
  144. return gs_heatmap
  145. def generate_gaussian_heatmaps(xs, ys, heatmap_size, sigma=2.0, device='cuda'):
  146. """
  147. 为一组点生成并合并高斯热图。
  148. Args:
  149. xs (Tensor): 形状为 (N, 2) 的所有点的 x 坐标
  150. ys (Tensor): 形状为 (N, 2) 的所有点的 y 坐标
  151. heatmap_size (int): 热图大小 H=W
  152. sigma (float): 高斯核标准差
  153. device (str): 设备类型 ('cpu' or 'cuda')
  154. Returns:
  155. Tensor: 形状为 (H, W) 的合并后的热图
  156. """
  157. assert xs.shape == ys.shape, "x and y must have the same shape"
  158. N = xs.shape[0]
  159. print(f'N:{N}')
  160. # 创建网格
  161. grid_y, grid_x = torch.meshgrid(
  162. torch.arange(heatmap_size, device=device),
  163. torch.arange(heatmap_size, device=device),
  164. indexing='ij'
  165. )
  166. # print(f'heatmap_size:{heatmap_size}')
  167. # 初始化输出热图
  168. combined_heatmap = torch.zeros((N,heatmap_size, heatmap_size), device=device)
  169. for i in range(N):
  170. mu_x1 = xs[i, 0].clamp(0, heatmap_size - 1).item()
  171. mu_y1 = ys[i, 0].clamp(0, heatmap_size - 1).item()
  172. # 计算距离平方
  173. dist1 = (grid_x - mu_x1) ** 2 + (grid_y - mu_y1) ** 2
  174. # 计算高斯分布
  175. heatmap1 = torch.exp(-dist1 / (2 * sigma ** 2))
  176. mu_x2 = xs[i, 1].clamp(0, heatmap_size - 1).item()
  177. mu_y2 = ys[i, 1].clamp(0, heatmap_size - 1).item()
  178. # 计算距离平方
  179. dist2 = (grid_x - mu_x2) ** 2 + (grid_y - mu_y2) ** 2
  180. # 计算高斯分布
  181. heatmap2 = torch.exp(-dist2 / (2 * sigma ** 2))
  182. heatmap=heatmap1+heatmap2
  183. # 将当前热图累加到结果中
  184. combined_heatmap[i]= heatmap
  185. return combined_heatmap
  186. # 显示热图的函数
  187. def show_heatmap(heatmap, title="Heatmap"):
  188. """
  189. 使用 matplotlib 显示热图。
  190. Args:
  191. heatmap (Tensor): 要显示的热图张量
  192. title (str): 图表标题
  193. """
  194. # 如果在 GPU 上,首先将其移动到 CPU 并转换为 numpy 数组
  195. if heatmap.is_cuda:
  196. heatmap = heatmap.cpu().numpy()
  197. else:
  198. heatmap = heatmap.numpy()
  199. plt.imshow(heatmap, cmap='hot', interpolation='nearest')
  200. plt.colorbar()
  201. plt.title(title)
  202. plt.show()
  203. def keypoints_to_heatmap(keypoints, rois, heatmap_size):
  204. # type: (Tensor, Tensor, int) -> Tuple[Tensor, Tensor]
  205. offset_x = rois[:, 0]
  206. offset_y = rois[:, 1]
  207. scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
  208. scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
  209. offset_x = offset_x[:, None]
  210. offset_y = offset_y[:, None]
  211. scale_x = scale_x[:, None]
  212. scale_y = scale_y[:, None]
  213. x = keypoints[..., 0]
  214. y = keypoints[..., 1]
  215. x_boundary_inds = x == rois[:, 2][:, None]
  216. y_boundary_inds = y == rois[:, 3][:, None]
  217. x = (x - offset_x) * scale_x
  218. x = x.floor().long()
  219. y = (y - offset_y) * scale_y
  220. y = y.floor().long()
  221. x[x_boundary_inds] = heatmap_size - 1
  222. y[y_boundary_inds] = heatmap_size - 1
  223. valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
  224. vis = keypoints[..., 2] > 0
  225. valid = (valid_loc & vis).long()
  226. lin_ind = y * heatmap_size + x
  227. heatmaps = lin_ind * valid
  228. return heatmaps, valid
  229. def _onnx_heatmaps_to_keypoints(
  230. maps, maps_i, roi_map_width, roi_map_height, widths_i, heights_i, offset_x_i, offset_y_i
  231. ):
  232. num_keypoints = torch.scalar_tensor(maps.size(1), dtype=torch.int64)
  233. width_correction = widths_i / roi_map_width
  234. height_correction = heights_i / roi_map_height
  235. roi_map = F.interpolate(
  236. maps_i[:, None], size=(int(roi_map_height), int(roi_map_width)), mode="bicubic", align_corners=False
  237. )[:, 0]
  238. w = torch.scalar_tensor(roi_map.size(2), dtype=torch.int64)
  239. pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  240. x_int = pos % w
  241. y_int = (pos - x_int) // w
  242. x = (torch.tensor(0.5, dtype=torch.float32) + x_int.to(dtype=torch.float32)) * width_correction.to(
  243. dtype=torch.float32
  244. )
  245. y = (torch.tensor(0.5, dtype=torch.float32) + y_int.to(dtype=torch.float32)) * height_correction.to(
  246. dtype=torch.float32
  247. )
  248. xy_preds_i_0 = x + offset_x_i.to(dtype=torch.float32)
  249. xy_preds_i_1 = y + offset_y_i.to(dtype=torch.float32)
  250. xy_preds_i_2 = torch.ones(xy_preds_i_1.shape, dtype=torch.float32)
  251. xy_preds_i = torch.stack(
  252. [
  253. xy_preds_i_0.to(dtype=torch.float32),
  254. xy_preds_i_1.to(dtype=torch.float32),
  255. xy_preds_i_2.to(dtype=torch.float32),
  256. ],
  257. 0,
  258. )
  259. # TODO: simplify when indexing without rank will be supported by ONNX
  260. base = num_keypoints * num_keypoints + num_keypoints + 1
  261. ind = torch.arange(num_keypoints)
  262. ind = ind.to(dtype=torch.int64) * base
  263. end_scores_i = (
  264. roi_map.index_select(1, y_int.to(dtype=torch.int64))
  265. .index_select(2, x_int.to(dtype=torch.int64))
  266. .view(-1)
  267. .index_select(0, ind.to(dtype=torch.int64))
  268. )
  269. return xy_preds_i, end_scores_i
  270. @torch.jit._script_if_tracing
  271. def _onnx_heatmaps_to_keypoints_loop(
  272. maps, rois, widths_ceil, heights_ceil, widths, heights, offset_x, offset_y, num_keypoints
  273. ):
  274. xy_preds = torch.zeros((0, 3, int(num_keypoints)), dtype=torch.float32, device=maps.device)
  275. end_scores = torch.zeros((0, int(num_keypoints)), dtype=torch.float32, device=maps.device)
  276. for i in range(int(rois.size(0))):
  277. xy_preds_i, end_scores_i = _onnx_heatmaps_to_keypoints(
  278. maps, maps[i], widths_ceil[i], heights_ceil[i], widths[i], heights[i], offset_x[i], offset_y[i]
  279. )
  280. xy_preds = torch.cat((xy_preds.to(dtype=torch.float32), xy_preds_i.unsqueeze(0).to(dtype=torch.float32)), 0)
  281. end_scores = torch.cat(
  282. (end_scores.to(dtype=torch.float32), end_scores_i.to(dtype=torch.float32).unsqueeze(0)), 0
  283. )
  284. return xy_preds, end_scores
  285. def heatmaps_to_keypoints(maps, rois):
  286. """Extract predicted keypoint locations from heatmaps. Output has shape
  287. (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
  288. for each keypoint.
  289. """
  290. # This function converts a discrete image coordinate in a HEATMAP_SIZE x
  291. # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
  292. # consistency with keypoints_to_heatmap_labels by using the conversion from
  293. # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
  294. # continuous coordinate.
  295. offset_x = rois[:, 0]
  296. offset_y = rois[:, 1]
  297. widths = rois[:, 2] - rois[:, 0]
  298. heights = rois[:, 3] - rois[:, 1]
  299. widths = widths.clamp(min=1)
  300. heights = heights.clamp(min=1)
  301. widths_ceil = widths.ceil()
  302. heights_ceil = heights.ceil()
  303. num_keypoints = maps.shape[1]
  304. if torchvision._is_tracing():
  305. xy_preds, end_scores = _onnx_heatmaps_to_keypoints_loop(
  306. maps,
  307. rois,
  308. widths_ceil,
  309. heights_ceil,
  310. widths,
  311. heights,
  312. offset_x,
  313. offset_y,
  314. torch.scalar_tensor(num_keypoints, dtype=torch.int64),
  315. )
  316. return xy_preds.permute(0, 2, 1), end_scores
  317. xy_preds = torch.zeros((len(rois), 3, num_keypoints), dtype=torch.float32, device=maps.device)
  318. end_scores = torch.zeros((len(rois), num_keypoints), dtype=torch.float32, device=maps.device)
  319. for i in range(len(rois)):
  320. roi_map_width = int(widths_ceil[i].item())
  321. roi_map_height = int(heights_ceil[i].item())
  322. width_correction = widths[i] / roi_map_width
  323. height_correction = heights[i] / roi_map_height
  324. roi_map = F.interpolate(
  325. maps[i][:, None], size=(roi_map_height, roi_map_width), mode="bicubic", align_corners=False
  326. )[:, 0]
  327. # roi_map_probs = scores_to_probs(roi_map.copy())
  328. w = roi_map.shape[2]
  329. pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  330. x_int = pos % w
  331. y_int = torch.div(pos - x_int, w, rounding_mode="floor")
  332. # assert (roi_map_probs[k, y_int, x_int] ==
  333. # roi_map_probs[k, :, :].max())
  334. x = (x_int.float() + 0.5) * width_correction
  335. y = (y_int.float() + 0.5) * height_correction
  336. xy_preds[i, 0, :] = x + offset_x[i]
  337. xy_preds[i, 1, :] = y + offset_y[i]
  338. xy_preds[i, 2, :] = 1
  339. end_scores[i, :] = roi_map[torch.arange(num_keypoints, device=roi_map.device), y_int, x_int]
  340. return xy_preds.permute(0, 2, 1), end_scores
  341. def non_maximum_suppression(a):
  342. ap = F.max_pool2d(a, 3, stride=1, padding=1)
  343. mask = (a == ap).float().clamp(min=0.0)
  344. return a * mask
  345. def heatmaps_to_lines(maps, rois):
  346. """Extract predicted keypoint locations from heatmaps. Output has shape
  347. (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
  348. for each keypoint.
  349. """
  350. # This function converts a discrete image coordinate in a HEATMAP_SIZE x
  351. # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
  352. # consistency with keypoints_to_heatmap_labels by using the conversion from
  353. # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
  354. # continuous coordinate.
  355. offset_x = rois[:, 0]
  356. offset_y = rois[:, 1]
  357. widths = rois[:, 2] - rois[:, 0]
  358. heights = rois[:, 3] - rois[:, 1]
  359. widths = widths.clamp(min=1)
  360. heights = heights.clamp(min=1)
  361. widths_ceil = widths.ceil()
  362. heights_ceil = heights.ceil()
  363. num_keypoints = maps.shape[1]
  364. xy_preds = torch.zeros((len(rois), 3, 2), dtype=torch.float32, device=maps.device)
  365. end_scores = torch.zeros((len(rois), 2), dtype=torch.float32, device=maps.device)
  366. for i in range(len(rois)):
  367. roi_map_width = int(widths_ceil[i].item())
  368. roi_map_height = int(heights_ceil[i].item())
  369. width_correction = widths[i] / roi_map_width
  370. height_correction = heights[i] / roi_map_height
  371. roi_map = F.interpolate(
  372. maps[i][:, None], size=(roi_map_height, roi_map_width), mode="bicubic", align_corners=False
  373. )[:, 0]
  374. print(f'roi_map:{roi_map.shape}')
  375. # roi_map_probs = scores_to_probs(roi_map.copy())
  376. w = roi_map.shape[2]
  377. flatten_map=non_maximum_suppression(roi_map).reshape(1, -1)
  378. score, index = torch.topk(flatten_map, k=2)
  379. print(f'index:{index}')
  380. # pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  381. pos=index
  382. x_int = pos % w
  383. y_int = torch.div(pos - x_int, w, rounding_mode="floor")
  384. # assert (roi_map_probs[k, y_int, x_int] ==
  385. # roi_map_probs[k, :, :].max())
  386. x = (x_int.float() + 0.5) * width_correction
  387. y = (y_int.float() + 0.5) * height_correction
  388. xy_preds[i, 0, :] = x + offset_x[i]
  389. xy_preds[i, 1, :] = y + offset_y[i]
  390. xy_preds[i, 2, :] = 1
  391. end_scores[i, :] = roi_map[torch.arange(1, device=roi_map.device), y_int, x_int]
  392. return xy_preds.permute(0, 2, 1), end_scores
  393. def lines_point_pair_loss(line_logits, proposals, gt_lines, line_matched_idxs):
  394. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  395. N, K, H, W = line_logits.shape
  396. len_proposals=len(proposals)
  397. print(f'lines_point_pair_loss line_logits.shape:{line_logits.shape},len_proposals:{len_proposals}')
  398. if H != W:
  399. raise ValueError(
  400. f"line_logits height and width (last two elements of shape) should be equal. Instead got H = {H} and W = {W}"
  401. )
  402. discretization_size = H
  403. heatmaps = []
  404. gs_heatmaps=[]
  405. valid = []
  406. for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_lines, line_matched_idxs):
  407. print(f'proposals_per_image:{proposals_per_image.shape}')
  408. kp = gt_kp_in_image[midx]
  409. gs_heatmaps_per_img = line_points_to_heatmap(kp, proposals_per_image, discretization_size)
  410. gs_heatmaps.append(gs_heatmaps_per_img)
  411. # print(f'heatmaps_per_image:{heatmaps_per_image.shape}')
  412. # heatmaps.append(heatmaps_per_image.view(-1))
  413. # valid.append(valid_per_image.view(-1))
  414. # line_targets = torch.cat(heatmaps, dim=0)
  415. gs_heatmaps=torch.cat(gs_heatmaps,dim=0)
  416. print(f'gs_heatmaps:{gs_heatmaps.shape}, line_logits.shape:{line_logits.squeeze(1).shape}')
  417. # print(f'line_targets:{line_targets.shape},{line_targets}')
  418. # valid = torch.cat(valid, dim=0).to(dtype=torch.uint8)
  419. # valid = torch.where(valid)[0]
  420. # print(f' line_targets[valid]:{line_targets[valid]}')
  421. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  422. # accept empty tensors, so handle it sepaartely
  423. # if line_targets.numel() == 0 or len(valid) == 0:
  424. # return line_logits.sum() * 0
  425. # line_logits = line_logits.view(N * K, H * W)
  426. # print(f'line_logits[valid]:{line_logits[valid].shape}')
  427. line_logits=line_logits.squeeze(1)
  428. # line_loss = F.cross_entropy(line_logits[valid], line_targets[valid])
  429. line_loss=F.cross_entropy(line_logits,gs_heatmaps)
  430. return line_loss
  431. def is_collinear(p1, p2, q1, q2, eps=1e-6):
  432. v1 = p2 - p1
  433. v2 = q1 - p1
  434. cross_z = v1[0] * v2[1] - v1[1] * v2[0]
  435. return abs(cross_z) < eps
  436. def segment_intersection_length(line1, line2):
  437. p1, p2 = line1
  438. q1, q2 = line2
  439. if not is_collinear(p1, p2, q1, q2):
  440. return 0.0
  441. dir_vec = p2 - p1
  442. if torch.norm(dir_vec) == 0:
  443. return 0.0
  444. def project(point):
  445. return torch.dot(point - p1, dir_vec)
  446. t_p1 = 0.0
  447. t_p2 = 1.0
  448. t_q1 = project(q1)
  449. t_q2 = project(q2)
  450. t_min = max(t_p1, min(t_q1, t_q2))
  451. t_max = min(t_p2, max(t_q1, t_q2))
  452. if t_min >= t_max:
  453. return 0.0
  454. length = torch.norm(dir_vec) * (t_max - t_min)
  455. return length.item()
  456. def line_iou(pred_line, target_line):
  457. pred_line_coords = pred_line[:, :2]
  458. target_line_coords = target_line[:, :2]
  459. l1_len = torch.norm(pred_line_coords[1] - pred_line_coords[0])
  460. l2_len = torch.norm(target_line_coords[1] - target_line_coords[0])
  461. inter_len = segment_intersection_length(pred_line_coords, target_line_coords)
  462. union_len = l1_len + l2_len - inter_len
  463. if union_len <= 0:
  464. return 0.0
  465. return inter_len / union_len
  466. def line_iou_loss(x, boxes,gt_lines,matched_idx):
  467. # type: (Tensor, List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  468. points_probs = []
  469. points_scores = []
  470. losses = []
  471. boxes_per_image = [box.size(0) for box in boxes]
  472. x2 = x.split(boxes_per_image, dim=0)
  473. for xx, bb,gt_line,mid in zip(x2, boxes,gt_lines,matched_idx):
  474. p_prob, scores = heatmaps_to_lines(xx, bb)
  475. points_probs.append(p_prob)
  476. points_scores.append(scores)
  477. gt_line_points=gt_line[mid]
  478. print(f'gt_line_points:{gt_line_points.shape}')
  479. # 匹配预测线段和真实线段(例如匈牙利匹配)
  480. # 这里假设一对一匹配
  481. pred_lines = p_prob # shape: (num_pred_lines, 2, 2)
  482. print(f'pred_lines:{pred_lines.shape}')
  483. for j in range(min(len(pred_lines), len(gt_line_points))):
  484. iou = line_iou(pred_lines[j], gt_line_points[j])
  485. losses.append(1.0 - iou) # 损失为 1 - IoU
  486. total_loss = torch.mean(torch.stack(losses)) if losses else None
  487. return total_loss
  488. def line_inference(x, boxes):
  489. # type: (Tensor, List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  490. points_probs = []
  491. points_scores = []
  492. boxes_per_image = [box.size(0) for box in boxes]
  493. x2 = x.split(boxes_per_image, dim=0)
  494. for xx, bb in zip(x2, boxes):
  495. p_prob, scores = heatmaps_to_lines(xx, bb)
  496. points_probs.append(p_prob)
  497. points_scores.append(scores)
  498. return points_probs, points_scores
  499. def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched_idxs):
  500. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  501. N, K, H, W = keypoint_logits.shape
  502. if H != W:
  503. raise ValueError(
  504. f"keypoint_logits height and width (last two elements of shape) should be equal. Instead got H = {H} and W = {W}"
  505. )
  506. discretization_size = H
  507. heatmaps = []
  508. valid = []
  509. for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_keypoints, keypoint_matched_idxs):
  510. kp = gt_kp_in_image[midx]
  511. heatmaps_per_image, valid_per_image = keypoints_to_heatmap(kp, proposals_per_image, discretization_size)
  512. heatmaps.append(heatmaps_per_image.view(-1))
  513. valid.append(valid_per_image.view(-1))
  514. keypoint_targets = torch.cat(heatmaps, dim=0)
  515. valid = torch.cat(valid, dim=0).to(dtype=torch.uint8)
  516. valid = torch.where(valid)[0]
  517. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  518. # accept empty tensors, so handle it sepaartely
  519. if keypoint_targets.numel() == 0 or len(valid) == 0:
  520. return keypoint_logits.sum() * 0
  521. keypoint_logits = keypoint_logits.view(N * K, H * W)
  522. keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid])
  523. return keypoint_loss
  524. def keypointrcnn_inference(x, boxes):
  525. # type: (Tensor, List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  526. kp_probs = []
  527. kp_scores = []
  528. boxes_per_image = [box.size(0) for box in boxes]
  529. x2 = x.split(boxes_per_image, dim=0)
  530. for xx, bb in zip(x2, boxes):
  531. kp_prob, scores = heatmaps_to_keypoints(xx, bb)
  532. kp_probs.append(kp_prob)
  533. kp_scores.append(scores)
  534. return kp_probs, kp_scores
  535. def _onnx_expand_boxes(boxes, scale):
  536. # type: (Tensor, float) -> Tensor
  537. w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
  538. h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
  539. x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
  540. y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
  541. w_half = w_half.to(dtype=torch.float32) * scale
  542. h_half = h_half.to(dtype=torch.float32) * scale
  543. boxes_exp0 = x_c - w_half
  544. boxes_exp1 = y_c - h_half
  545. boxes_exp2 = x_c + w_half
  546. boxes_exp3 = y_c + h_half
  547. boxes_exp = torch.stack((boxes_exp0, boxes_exp1, boxes_exp2, boxes_exp3), 1)
  548. return boxes_exp
  549. # the next two functions should be merged inside Masker
  550. # but are kept here for the moment while we need them
  551. # temporarily for paste_mask_in_image
  552. def expand_boxes(boxes, scale):
  553. # type: (Tensor, float) -> Tensor
  554. if torchvision._is_tracing():
  555. return _onnx_expand_boxes(boxes, scale)
  556. w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
  557. h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
  558. x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
  559. y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
  560. w_half *= scale
  561. h_half *= scale
  562. boxes_exp = torch.zeros_like(boxes)
  563. boxes_exp[:, 0] = x_c - w_half
  564. boxes_exp[:, 2] = x_c + w_half
  565. boxes_exp[:, 1] = y_c - h_half
  566. boxes_exp[:, 3] = y_c + h_half
  567. return boxes_exp
  568. @torch.jit.unused
  569. def expand_masks_tracing_scale(M, padding):
  570. # type: (int, int) -> float
  571. return torch.tensor(M + 2 * padding).to(torch.float32) / torch.tensor(M).to(torch.float32)
  572. def expand_masks(mask, padding):
  573. # type: (Tensor, int) -> Tuple[Tensor, float]
  574. M = mask.shape[-1]
  575. if torch._C._get_tracing_state(): # could not import is_tracing(), not sure why
  576. scale = expand_masks_tracing_scale(M, padding)
  577. else:
  578. scale = float(M + 2 * padding) / M
  579. padded_mask = F.pad(mask, (padding,) * 4)
  580. return padded_mask, scale
  581. def paste_mask_in_image(mask, box, im_h, im_w):
  582. # type: (Tensor, Tensor, int, int) -> Tensor
  583. TO_REMOVE = 1
  584. w = int(box[2] - box[0] + TO_REMOVE)
  585. h = int(box[3] - box[1] + TO_REMOVE)
  586. w = max(w, 1)
  587. h = max(h, 1)
  588. # Set shape to [batchxCxHxW]
  589. mask = mask.expand((1, 1, -1, -1))
  590. # Resize mask
  591. mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
  592. mask = mask[0][0]
  593. im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device)
  594. x_0 = max(box[0], 0)
  595. x_1 = min(box[2] + 1, im_w)
  596. y_0 = max(box[1], 0)
  597. y_1 = min(box[3] + 1, im_h)
  598. im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - box[1]) : (y_1 - box[1]), (x_0 - box[0]) : (x_1 - box[0])]
  599. return im_mask
  600. def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
  601. one = torch.ones(1, dtype=torch.int64)
  602. zero = torch.zeros(1, dtype=torch.int64)
  603. w = box[2] - box[0] + one
  604. h = box[3] - box[1] + one
  605. w = torch.max(torch.cat((w, one)))
  606. h = torch.max(torch.cat((h, one)))
  607. # Set shape to [batchxCxHxW]
  608. mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
  609. # Resize mask
  610. mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
  611. mask = mask[0][0]
  612. x_0 = torch.max(torch.cat((box[0].unsqueeze(0), zero)))
  613. x_1 = torch.min(torch.cat((box[2].unsqueeze(0) + one, im_w.unsqueeze(0))))
  614. y_0 = torch.max(torch.cat((box[1].unsqueeze(0), zero)))
  615. y_1 = torch.min(torch.cat((box[3].unsqueeze(0) + one, im_h.unsqueeze(0))))
  616. unpaded_im_mask = mask[(y_0 - box[1]) : (y_1 - box[1]), (x_0 - box[0]) : (x_1 - box[0])]
  617. # TODO : replace below with a dynamic padding when support is added in ONNX
  618. # pad y
  619. zeros_y0 = torch.zeros(y_0, unpaded_im_mask.size(1))
  620. zeros_y1 = torch.zeros(im_h - y_1, unpaded_im_mask.size(1))
  621. concat_0 = torch.cat((zeros_y0, unpaded_im_mask.to(dtype=torch.float32), zeros_y1), 0)[0:im_h, :]
  622. # pad x
  623. zeros_x0 = torch.zeros(concat_0.size(0), x_0)
  624. zeros_x1 = torch.zeros(concat_0.size(0), im_w - x_1)
  625. im_mask = torch.cat((zeros_x0, concat_0, zeros_x1), 1)[:, :im_w]
  626. return im_mask
  627. @torch.jit._script_if_tracing
  628. def _onnx_paste_masks_in_image_loop(masks, boxes, im_h, im_w):
  629. res_append = torch.zeros(0, im_h, im_w)
  630. for i in range(masks.size(0)):
  631. mask_res = _onnx_paste_mask_in_image(masks[i][0], boxes[i], im_h, im_w)
  632. mask_res = mask_res.unsqueeze(0)
  633. res_append = torch.cat((res_append, mask_res))
  634. return res_append
  635. def paste_masks_in_image(masks, boxes, img_shape, padding=1):
  636. # type: (Tensor, Tensor, Tuple[int, int], int) -> Tensor
  637. masks, scale = expand_masks(masks, padding=padding)
  638. boxes = expand_boxes(boxes, scale).to(dtype=torch.int64)
  639. im_h, im_w = img_shape
  640. if torchvision._is_tracing():
  641. return _onnx_paste_masks_in_image_loop(
  642. masks, boxes, torch.scalar_tensor(im_h, dtype=torch.int64), torch.scalar_tensor(im_w, dtype=torch.int64)
  643. )[:, None]
  644. res = [paste_mask_in_image(m[0], b, im_h, im_w) for m, b in zip(masks, boxes)]
  645. if len(res) > 0:
  646. ret = torch.stack(res, dim=0)[:, None]
  647. else:
  648. ret = masks.new_empty((0, 1, im_h, im_w))
  649. return ret
  650. class RoIHeads(nn.Module):
  651. __annotations__ = {
  652. "box_coder": det_utils.BoxCoder,
  653. "proposal_matcher": det_utils.Matcher,
  654. "fg_bg_sampler": det_utils.BalancedPositiveNegativeSampler,
  655. }
  656. def __init__(
  657. self,
  658. box_roi_pool,
  659. box_head,
  660. box_predictor,
  661. # Faster R-CNN training
  662. fg_iou_thresh,
  663. bg_iou_thresh,
  664. batch_size_per_image,
  665. positive_fraction,
  666. bbox_reg_weights,
  667. # Faster R-CNN inference
  668. score_thresh,
  669. nms_thresh,
  670. detections_per_img,
  671. # Line
  672. line_roi_pool=None,
  673. line_head=None,
  674. line_predictor=None,
  675. # Mask
  676. mask_roi_pool=None,
  677. mask_head=None,
  678. mask_predictor=None,
  679. keypoint_roi_pool=None,
  680. keypoint_head=None,
  681. keypoint_predictor=None,
  682. ):
  683. super().__init__()
  684. self.box_similarity = box_ops.box_iou
  685. # assign ground-truth boxes for each proposal
  686. self.proposal_matcher = det_utils.Matcher(fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)
  687. self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(batch_size_per_image, positive_fraction)
  688. if bbox_reg_weights is None:
  689. bbox_reg_weights = (10.0, 10.0, 5.0, 5.0)
  690. self.box_coder = det_utils.BoxCoder(bbox_reg_weights)
  691. self.box_roi_pool = box_roi_pool
  692. self.box_head = box_head
  693. self.box_predictor = box_predictor
  694. self.score_thresh = score_thresh
  695. self.nms_thresh = nms_thresh
  696. self.detections_per_img = detections_per_img
  697. self.line_roi_pool = line_roi_pool
  698. self.line_head = line_head
  699. self.line_predictor = line_predictor
  700. self.mask_roi_pool = mask_roi_pool
  701. self.mask_head = mask_head
  702. self.mask_predictor = mask_predictor
  703. self.keypoint_roi_pool = keypoint_roi_pool
  704. self.keypoint_head = keypoint_head
  705. self.keypoint_predictor = keypoint_predictor
  706. def has_mask(self):
  707. if self.mask_roi_pool is None:
  708. return False
  709. if self.mask_head is None:
  710. return False
  711. if self.mask_predictor is None:
  712. return False
  713. return True
  714. def has_keypoint(self):
  715. if self.keypoint_roi_pool is None:
  716. return False
  717. if self.keypoint_head is None:
  718. return False
  719. if self.keypoint_predictor is None:
  720. return False
  721. return True
  722. def has_line(self):
  723. if self.line_roi_pool is None:
  724. return False
  725. if self.line_head is None:
  726. return False
  727. if self.line_predictor is None:
  728. return False
  729. return True
  730. def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):
  731. # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  732. matched_idxs = []
  733. labels = []
  734. for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):
  735. if gt_boxes_in_image.numel() == 0:
  736. # Background image
  737. device = proposals_in_image.device
  738. clamped_matched_idxs_in_image = torch.zeros(
  739. (proposals_in_image.shape[0],), dtype=torch.int64, device=device
  740. )
  741. labels_in_image = torch.zeros((proposals_in_image.shape[0],), dtype=torch.int64, device=device)
  742. else:
  743. # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
  744. match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)
  745. matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)
  746. clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
  747. labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
  748. labels_in_image = labels_in_image.to(dtype=torch.int64)
  749. # Label background (below the low threshold)
  750. bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD
  751. labels_in_image[bg_inds] = 0
  752. # Label ignore proposals (between low and high thresholds)
  753. ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS
  754. labels_in_image[ignore_inds] = -1 # -1 is ignored by sampler
  755. matched_idxs.append(clamped_matched_idxs_in_image)
  756. labels.append(labels_in_image)
  757. return matched_idxs, labels
  758. def subsample(self, labels):
  759. # type: (List[Tensor]) -> List[Tensor]
  760. sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
  761. sampled_inds = []
  762. for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)):
  763. img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0]
  764. sampled_inds.append(img_sampled_inds)
  765. return sampled_inds
  766. def add_gt_proposals(self, proposals, gt_boxes):
  767. # type: (List[Tensor], List[Tensor]) -> List[Tensor]
  768. proposals = [torch.cat((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes)]
  769. return proposals
  770. def check_targets(self, targets):
  771. # type: (Optional[List[Dict[str, Tensor]]]) -> None
  772. if targets is None:
  773. raise ValueError("targets should not be None")
  774. if not all(["boxes" in t for t in targets]):
  775. raise ValueError("Every element of targets should have a boxes key")
  776. if not all(["labels" in t for t in targets]):
  777. raise ValueError("Every element of targets should have a labels key")
  778. if self.has_mask():
  779. if not all(["masks" in t for t in targets]):
  780. raise ValueError("Every element of targets should have a masks key")
  781. def select_training_samples(
  782. self,
  783. proposals, # type: List[Tensor]
  784. targets, # type: Optional[List[Dict[str, Tensor]]]
  785. ):
  786. # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]
  787. self.check_targets(targets)
  788. if targets is None:
  789. raise ValueError("targets should not be None")
  790. dtype = proposals[0].dtype
  791. device = proposals[0].device
  792. gt_boxes = [t["boxes"].to(dtype) for t in targets]
  793. gt_labels = [t["labels"] for t in targets]
  794. # append ground-truth bboxes to propos
  795. proposals = self.add_gt_proposals(proposals, gt_boxes)
  796. # get matching gt indices for each proposal
  797. matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)
  798. # sample a fixed proportion of positive-negative proposals
  799. sampled_inds = self.subsample(labels)
  800. matched_gt_boxes = []
  801. num_images = len(proposals)
  802. for img_id in range(num_images):
  803. img_sampled_inds = sampled_inds[img_id]
  804. proposals[img_id] = proposals[img_id][img_sampled_inds]
  805. labels[img_id] = labels[img_id][img_sampled_inds]
  806. matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]
  807. gt_boxes_in_image = gt_boxes[img_id]
  808. if gt_boxes_in_image.numel() == 0:
  809. gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)
  810. matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])
  811. regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)
  812. return proposals, matched_idxs, labels, regression_targets
  813. def postprocess_detections(
  814. self,
  815. class_logits, # type: Tensor
  816. box_regression, # type: Tensor
  817. proposals, # type: List[Tensor]
  818. image_shapes, # type: List[Tuple[int, int]]
  819. ):
  820. # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
  821. device = class_logits.device
  822. num_classes = class_logits.shape[-1]
  823. boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
  824. pred_boxes = self.box_coder.decode(box_regression, proposals)
  825. pred_scores = F.softmax(class_logits, -1)
  826. pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
  827. pred_scores_list = pred_scores.split(boxes_per_image, 0)
  828. all_boxes = []
  829. all_scores = []
  830. all_labels = []
  831. for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):
  832. boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
  833. # create labels for each prediction
  834. labels = torch.arange(num_classes, device=device)
  835. labels = labels.view(1, -1).expand_as(scores)
  836. # remove predictions with the background label
  837. boxes = boxes[:, 1:]
  838. scores = scores[:, 1:]
  839. labels = labels[:, 1:]
  840. # batch everything, by making every class prediction be a separate instance
  841. boxes = boxes.reshape(-1, 4)
  842. scores = scores.reshape(-1)
  843. labels = labels.reshape(-1)
  844. # remove low scoring boxes
  845. inds = torch.where(scores > self.score_thresh)[0]
  846. boxes, scores, labels = boxes[inds], scores[inds], labels[inds]
  847. # remove empty boxes
  848. keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
  849. boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
  850. # non-maximum suppression, independently done per class
  851. keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
  852. # keep only topk scoring predictions
  853. keep = keep[: self.detections_per_img]
  854. boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
  855. all_boxes.append(boxes)
  856. all_scores.append(scores)
  857. all_labels.append(labels)
  858. return all_boxes, all_scores, all_labels
  859. def forward(
  860. self,
  861. features, # type: Dict[str, Tensor]
  862. proposals, # type: List[Tensor]
  863. image_shapes, # type: List[Tuple[int, int]]
  864. targets=None, # type: Optional[List[Dict[str, Tensor]]]
  865. ):
  866. # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]
  867. """
  868. Args:
  869. features (List[Tensor])
  870. proposals (List[Tensor[N, 4]])
  871. image_shapes (List[Tuple[H, W]])
  872. targets (List[Dict])
  873. """
  874. print(f'roihead forward!!!')
  875. if targets is not None:
  876. for t in targets:
  877. # TODO: https://github.com/pytorch/pytorch/issues/26731
  878. floating_point_types = (torch.float, torch.double, torch.half)
  879. if not t["boxes"].dtype in floating_point_types:
  880. raise TypeError(f"target boxes must of float type, instead got {t['boxes'].dtype}")
  881. if not t["labels"].dtype == torch.int64:
  882. raise TypeError(f"target labels must of int64 type, instead got {t['labels'].dtype}")
  883. if self.has_keypoint():
  884. if not t["keypoints"].dtype == torch.float32:
  885. raise TypeError(f"target keypoints must of float type, instead got {t['keypoints'].dtype}")
  886. if self.training:
  887. proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
  888. else:
  889. if targets is not None:
  890. proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
  891. else:
  892. labels = None
  893. regression_targets = None
  894. matched_idxs = None
  895. box_features = self.box_roi_pool(features, proposals, image_shapes)
  896. box_features = self.box_head(box_features)
  897. class_logits, box_regression = self.box_predictor(box_features)
  898. result: List[Dict[str, torch.Tensor]] = []
  899. losses = {}
  900. # _, C, H, W = features['0'].shape # 忽略 batch_size,因为我们只关心 C, H, W
  901. if self.training:
  902. if labels is None:
  903. raise ValueError("labels cannot be None")
  904. if regression_targets is None:
  905. raise ValueError("regression_targets cannot be None")
  906. print(f'boxes compute losses')
  907. loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
  908. losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
  909. else:
  910. if targets is not None:
  911. loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
  912. losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
  913. boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals,
  914. image_shapes)
  915. num_images = len(boxes)
  916. for i in range(num_images):
  917. result.append(
  918. {
  919. "boxes": boxes[i],
  920. "labels": labels[i],
  921. "scores": scores[i],
  922. }
  923. )
  924. if self.has_line():
  925. print(f'roi_heads forward has_line()!!!!')
  926. line_proposals = [p["boxes"] for p in result]
  927. print(f'boxes_proposals:{len(line_proposals)}')
  928. # if line_proposals is None or len(line_proposals) == 0:
  929. # # 返回空特征或者跳过该部分计算
  930. # return torch.empty(0, C, H, W).to(features['0'].device)
  931. if self.training:
  932. # during training, only focus on positive boxes
  933. num_images = len(proposals)
  934. print(f'num_images:{num_images}')
  935. line_proposals = []
  936. pos_matched_idxs = []
  937. if matched_idxs is None:
  938. raise ValueError("if in trainning, matched_idxs should not be None")
  939. for img_id in range(num_images):
  940. pos = torch.where(labels[img_id] > 0)[0]
  941. line_proposals.append(proposals[img_id][pos])
  942. pos_matched_idxs.append(matched_idxs[img_id][pos])
  943. else:
  944. if targets is not None:
  945. pos_matched_idxs = []
  946. num_images = len(proposals)
  947. line_proposals = []
  948. print(f'val num_images:{num_images}')
  949. if matched_idxs is None:
  950. raise ValueError("if in trainning, matched_idxs should not be None")
  951. for img_id in range(num_images):
  952. pos = torch.where(labels[img_id] > 0)[0]
  953. line_proposals.append(proposals[img_id][pos])
  954. pos_matched_idxs.append(matched_idxs[img_id][pos])
  955. else:
  956. pos_matched_idxs = None
  957. print(f'line_proposals:{len(line_proposals)}')
  958. line_features = self.line_roi_pool(features, line_proposals, image_shapes)
  959. print(f'line_features from line_roi_pool:{line_features.shape}')
  960. line_features = self.line_head(line_features)
  961. print(f'line_features from line_head:{line_features.shape}')
  962. line_logits = self.line_predictor(line_features)
  963. print(f'line_logits:{line_logits.shape}')
  964. loss_line = {}
  965. loss_line_iou={}
  966. if self.training:
  967. if targets is None or pos_matched_idxs is None:
  968. raise ValueError("both targets and pos_matched_idxs should not be None when in training mode")
  969. gt_lines = [t["lines"] for t in targets]
  970. rcnn_loss_line = lines_point_pair_loss(
  971. line_logits, line_proposals, gt_lines, pos_matched_idxs
  972. )
  973. iou_loss = line_iou_loss(line_logits, line_proposals, gt_lines, pos_matched_idxs)
  974. loss_line = {"loss_line": rcnn_loss_line}
  975. loss_line_iou = {'loss_line_iou': iou_loss}
  976. else:
  977. if targets is not None:
  978. gt_lines = [t["lines"] for t in targets]
  979. rcnn_loss_lines = lines_point_pair_loss(
  980. line_logits, line_proposals, gt_lines, pos_matched_idxs
  981. )
  982. loss_line = {"loss_line": rcnn_loss_lines}
  983. iou_loss =line_iou_loss(line_logits, line_proposals,gt_lines,pos_matched_idxs)
  984. loss_line_iou={'loss_line_iou':iou_loss}
  985. else:
  986. if line_logits is None or line_proposals is None:
  987. raise ValueError(
  988. "both keypoint_logits and keypoint_proposals should not be None when not in training mode"
  989. )
  990. lines_probs, kp_scores = line_inference(line_logits, line_proposals)
  991. for keypoint_prob, kps, r in zip(lines_probs, kp_scores, result):
  992. r["lines"] = keypoint_prob
  993. r["liness_scores"] = kps
  994. losses.update(loss_line)
  995. losses.update(loss_line_iou)
  996. if self.has_mask():
  997. mask_proposals = [p["boxes"] for p in result]
  998. if self.training:
  999. if matched_idxs is None:
  1000. raise ValueError("if in training, matched_idxs should not be None")
  1001. # during training, only focus on positive boxes
  1002. num_images = len(proposals)
  1003. mask_proposals = []
  1004. pos_matched_idxs = []
  1005. for img_id in range(num_images):
  1006. pos = torch.where(labels[img_id] > 0)[0]
  1007. mask_proposals.append(proposals[img_id][pos])
  1008. pos_matched_idxs.append(matched_idxs[img_id][pos])
  1009. else:
  1010. pos_matched_idxs = None
  1011. if self.mask_roi_pool is not None:
  1012. mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)
  1013. mask_features = self.mask_head(mask_features)
  1014. mask_logits = self.mask_predictor(mask_features)
  1015. else:
  1016. raise Exception("Expected mask_roi_pool to be not None")
  1017. loss_mask = {}
  1018. if self.training:
  1019. if targets is None or pos_matched_idxs is None or mask_logits is None:
  1020. raise ValueError("targets, pos_matched_idxs, mask_logits cannot be None when training")
  1021. gt_masks = [t["masks"] for t in targets]
  1022. gt_labels = [t["labels"] for t in targets]
  1023. rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs)
  1024. loss_mask = {"loss_mask": rcnn_loss_mask}
  1025. else:
  1026. labels = [r["labels"] for r in result]
  1027. masks_probs = maskrcnn_inference(mask_logits, labels)
  1028. for mask_prob, r in zip(masks_probs, result):
  1029. r["masks"] = mask_prob
  1030. losses.update(loss_mask)
  1031. # keep none checks in if conditional so torchscript will conditionally
  1032. # compile each branch
  1033. if self.has_keypoint():
  1034. keypoint_proposals = [p["boxes"] for p in result]
  1035. if self.training:
  1036. # during training, only focus on positive boxes
  1037. num_images = len(proposals)
  1038. keypoint_proposals = []
  1039. pos_matched_idxs = []
  1040. if matched_idxs is None:
  1041. raise ValueError("if in trainning, matched_idxs should not be None")
  1042. for img_id in range(num_images):
  1043. pos = torch.where(labels[img_id] > 0)[0]
  1044. keypoint_proposals.append(proposals[img_id][pos])
  1045. pos_matched_idxs.append(matched_idxs[img_id][pos])
  1046. else:
  1047. pos_matched_idxs = None
  1048. keypoint_features = self.line_roi_pool(features, keypoint_proposals, image_shapes)
  1049. keypoint_features = self.line_head(keypoint_features)
  1050. keypoint_logits = self.line_predictor(keypoint_features)
  1051. loss_keypoint = {}
  1052. if self.training:
  1053. if targets is None or pos_matched_idxs is None:
  1054. raise ValueError("both targets and pos_matched_idxs should not be None when in training mode")
  1055. gt_keypoints = [t["keypoints"] for t in targets]
  1056. rcnn_loss_keypoint = keypointrcnn_loss(
  1057. keypoint_logits, keypoint_proposals, gt_keypoints, pos_matched_idxs
  1058. )
  1059. loss_keypoint = {"loss_keypoint": rcnn_loss_keypoint}
  1060. else:
  1061. if keypoint_logits is None or keypoint_proposals is None:
  1062. raise ValueError(
  1063. "both keypoint_logits and keypoint_proposals should not be None when not in training mode"
  1064. )
  1065. keypoints_probs, kp_scores = keypointrcnn_inference(keypoint_logits, keypoint_proposals)
  1066. for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result):
  1067. r["keypoints"] = keypoint_prob
  1068. r["keypoints_scores"] = kps
  1069. losses.update(loss_keypoint)
  1070. return result, losses