loi_heads.py 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336
  1. from typing import Dict, List, Optional, Tuple
  2. import matplotlib.pyplot as plt
  3. import torch
  4. import torch.nn.functional as F
  5. import torchvision
  6. from scipy.optimize import linear_sum_assignment
  7. from torch import nn, Tensor
  8. from libs.vision_libs.ops import boxes as box_ops, roi_align
  9. import libs.vision_libs.models.detection._utils as det_utils
  10. from collections import OrderedDict
  11. def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
  12. # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
  13. """
  14. Computes the loss for Faster R-CNN.
  15. Args:
  16. class_logits (Tensor)
  17. box_regression (Tensor)
  18. labels (list[BoxList])
  19. regression_targets (Tensor)
  20. Returns:
  21. classification_loss (Tensor)
  22. box_loss (Tensor)
  23. """
  24. # print(f'compute fastrcnn_loss:{labels}')
  25. labels = torch.cat(labels, dim=0)
  26. regression_targets = torch.cat(regression_targets, dim=0)
  27. classification_loss = F.cross_entropy(class_logits, labels)
  28. # get indices that correspond to the regression targets for
  29. # the corresponding ground truth labels, to be used with
  30. # advanced indexing
  31. sampled_pos_inds_subset = torch.where(labels > 0)[0]
  32. labels_pos = labels[sampled_pos_inds_subset]
  33. N, num_classes = class_logits.shape
  34. box_regression = box_regression.reshape(N, box_regression.size(-1) // 4, 4)
  35. box_loss = F.smooth_l1_loss(
  36. box_regression[sampled_pos_inds_subset, labels_pos],
  37. regression_targets[sampled_pos_inds_subset],
  38. beta=1 / 9,
  39. reduction="sum",
  40. )
  41. box_loss = box_loss / labels.numel()
  42. return classification_loss, box_loss
  43. def maskrcnn_inference(x, labels):
  44. # type: (Tensor, List[Tensor]) -> List[Tensor]
  45. """
  46. From the results of the CNN, post process the masks
  47. by taking the mask corresponding to the class with max
  48. probability (which are of fixed size and directly output
  49. by the CNN) and return the masks in the mask field of the BoxList.
  50. Args:
  51. x (Tensor): the mask logits
  52. labels (list[BoxList]): bounding boxes that are used as
  53. reference, one for ech image
  54. Returns:
  55. results (list[BoxList]): one BoxList for each image, containing
  56. the extra field mask
  57. """
  58. mask_prob = x.sigmoid()
  59. # select masks corresponding to the predicted classes
  60. num_masks = x.shape[0]
  61. boxes_per_image = [label.shape[0] for label in labels]
  62. labels = torch.cat(labels)
  63. index = torch.arange(num_masks, device=labels.device)
  64. mask_prob = mask_prob[index, labels][:, None]
  65. mask_prob = mask_prob.split(boxes_per_image, dim=0)
  66. return mask_prob
  67. def project_masks_on_boxes(gt_masks, boxes, matched_idxs, M):
  68. # type: (Tensor, Tensor, Tensor, int) -> Tensor
  69. """
  70. Given segmentation masks and the bounding boxes corresponding
  71. to the location of the masks in the image, this function
  72. crops and resizes the masks in the position defined by the
  73. boxes. This prepares the masks for them to be fed to the
  74. loss computation as the targets.
  75. """
  76. matched_idxs = matched_idxs.to(boxes)
  77. rois = torch.cat([matched_idxs[:, None], boxes], dim=1)
  78. gt_masks = gt_masks[:, None].to(rois)
  79. return roi_align(gt_masks, rois, (M, M), 1.0)[:, 0]
  80. def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs):
  81. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  82. """
  83. Args:
  84. proposals (list[BoxList])
  85. mask_logits (Tensor)
  86. targets (list[BoxList])
  87. Return:
  88. mask_loss (Tensor): scalar tensor containing the loss
  89. """
  90. discretization_size = mask_logits.shape[-1]
  91. labels = [gt_label[idxs] for gt_label, idxs in zip(gt_labels, mask_matched_idxs)]
  92. mask_targets = [
  93. project_masks_on_boxes(m, p, i, discretization_size) for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)
  94. ]
  95. labels = torch.cat(labels, dim=0)
  96. mask_targets = torch.cat(mask_targets, dim=0)
  97. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  98. # accept empty tensors, so handle it separately
  99. if mask_targets.numel() == 0:
  100. return mask_logits.sum() * 0
  101. mask_loss = F.binary_cross_entropy_with_logits(
  102. mask_logits[torch.arange(labels.shape[0], device=labels.device), labels], mask_targets
  103. )
  104. return mask_loss
  105. def line_points_to_heatmap(keypoints, rois, heatmap_size):
  106. # type: (Tensor, Tensor, int) -> Tuple[Tensor, Tensor]
  107. print(f'rois:{rois.shape}')
  108. print(f'heatmap_size:{heatmap_size}')
  109. offset_x = rois[:, 0]
  110. offset_y = rois[:, 1]
  111. scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
  112. scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
  113. offset_x = offset_x[:, None]
  114. offset_y = offset_y[:, None]
  115. scale_x = scale_x[:, None]
  116. scale_y = scale_y[:, None]
  117. print(f'keypoints.shape:{keypoints.shape}')
  118. # batch_size, num_keypoints, _ = keypoints.shape
  119. x = keypoints[..., 0]
  120. y = keypoints[..., 1]
  121. # gs=generate_gaussian_heatmaps(x,y,512,1.0)
  122. # print(f'gs_heatmap shape:{gs.shape}')
  123. #
  124. # show_heatmap(gs,'target')
  125. x_boundary_inds = x == rois[:, 2][:, None]
  126. y_boundary_inds = y == rois[:, 3][:, None]
  127. x = (x - offset_x) * scale_x
  128. x = x.floor().long()
  129. y = (y - offset_y) * scale_y
  130. y = y.floor().long()
  131. x[x_boundary_inds] = heatmap_size - 1
  132. y[y_boundary_inds] = heatmap_size - 1
  133. # print(f'heatmaps x:{x}')
  134. # print(f'heatmaps y:{y}')
  135. valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
  136. vis = keypoints[..., 2] > 0
  137. valid = (valid_loc & vis).long()
  138. gs_heatmap=generate_gaussian_heatmaps(x,y,heatmap_size,1.0)
  139. # show_heatmap(gs_heatmap[0],'feature')
  140. print(f'gs_heatmap:{gs_heatmap.shape}')
  141. #
  142. # lin_ind = y * heatmap_size + x
  143. # print(f'lin_ind:{lin_ind.shape}')
  144. # heatmaps = lin_ind * valid
  145. return gs_heatmap
  146. def generate_gaussian_heatmaps(xs, ys, heatmap_size, sigma=2.0, device='cuda'):
  147. """
  148. 为一组点生成并合并高斯热图。
  149. Args:
  150. xs (Tensor): 形状为 (N, 2) 的所有点的 x 坐标
  151. ys (Tensor): 形状为 (N, 2) 的所有点的 y 坐标
  152. heatmap_size (int): 热图大小 H=W
  153. sigma (float): 高斯核标准差
  154. device (str): 设备类型 ('cpu' or 'cuda')
  155. Returns:
  156. Tensor: 形状为 (H, W) 的合并后的热图
  157. """
  158. assert xs.shape == ys.shape, "x and y must have the same shape"
  159. N = xs.shape[0]
  160. print(f'N:{N}')
  161. # 创建网格
  162. grid_y, grid_x = torch.meshgrid(
  163. torch.arange(heatmap_size, device=device),
  164. torch.arange(heatmap_size, device=device),
  165. indexing='ij'
  166. )
  167. # print(f'heatmap_size:{heatmap_size}')
  168. # 初始化输出热图
  169. combined_heatmap = torch.zeros((N,heatmap_size, heatmap_size), device=device)
  170. for i in range(N):
  171. mu_x1 = xs[i, 0].clamp(0, heatmap_size - 1).item()
  172. mu_y1 = ys[i, 0].clamp(0, heatmap_size - 1).item()
  173. # 计算距离平方
  174. dist1 = (grid_x - mu_x1) ** 2 + (grid_y - mu_y1) ** 2
  175. # 计算高斯分布
  176. heatmap1 = torch.exp(-dist1 / (2 * sigma ** 2))
  177. mu_x2 = xs[i, 1].clamp(0, heatmap_size - 1).item()
  178. mu_y2 = ys[i, 1].clamp(0, heatmap_size - 1).item()
  179. # 计算距离平方
  180. dist2 = (grid_x - mu_x2) ** 2 + (grid_y - mu_y2) ** 2
  181. # 计算高斯分布
  182. heatmap2 = torch.exp(-dist2 / (2 * sigma ** 2))
  183. heatmap=heatmap1+heatmap2
  184. # 将当前热图累加到结果中
  185. combined_heatmap[i]= heatmap
  186. return combined_heatmap
  187. # 显示热图的函数
  188. def show_heatmap(heatmap, title="Heatmap"):
  189. """
  190. 使用 matplotlib 显示热图。
  191. Args:
  192. heatmap (Tensor): 要显示的热图张量
  193. title (str): 图表标题
  194. """
  195. # 如果在 GPU 上,首先将其移动到 CPU 并转换为 numpy 数组
  196. if heatmap.is_cuda:
  197. heatmap = heatmap.cpu().numpy()
  198. else:
  199. heatmap = heatmap.numpy()
  200. plt.imshow(heatmap, cmap='hot', interpolation='nearest')
  201. plt.colorbar()
  202. plt.title(title)
  203. plt.show()
  204. def keypoints_to_heatmap(keypoints, rois, heatmap_size):
  205. # type: (Tensor, Tensor, int) -> Tuple[Tensor, Tensor]
  206. offset_x = rois[:, 0]
  207. offset_y = rois[:, 1]
  208. scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
  209. scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
  210. offset_x = offset_x[:, None]
  211. offset_y = offset_y[:, None]
  212. scale_x = scale_x[:, None]
  213. scale_y = scale_y[:, None]
  214. x = keypoints[..., 0]
  215. y = keypoints[..., 1]
  216. x_boundary_inds = x == rois[:, 2][:, None]
  217. y_boundary_inds = y == rois[:, 3][:, None]
  218. x = (x - offset_x) * scale_x
  219. x = x.floor().long()
  220. y = (y - offset_y) * scale_y
  221. y = y.floor().long()
  222. x[x_boundary_inds] = heatmap_size - 1
  223. y[y_boundary_inds] = heatmap_size - 1
  224. valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
  225. vis = keypoints[..., 2] > 0
  226. valid = (valid_loc & vis).long()
  227. lin_ind = y * heatmap_size + x
  228. heatmaps = lin_ind * valid
  229. return heatmaps, valid
  230. def _onnx_heatmaps_to_keypoints(
  231. maps, maps_i, roi_map_width, roi_map_height, widths_i, heights_i, offset_x_i, offset_y_i
  232. ):
  233. num_keypoints = torch.scalar_tensor(maps.size(1), dtype=torch.int64)
  234. width_correction = widths_i / roi_map_width
  235. height_correction = heights_i / roi_map_height
  236. roi_map = F.interpolate(
  237. maps_i[:, None], size=(int(roi_map_height), int(roi_map_width)), mode="bicubic", align_corners=False
  238. )[:, 0]
  239. w = torch.scalar_tensor(roi_map.size(2), dtype=torch.int64)
  240. pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  241. x_int = pos % w
  242. y_int = (pos - x_int) // w
  243. x = (torch.tensor(0.5, dtype=torch.float32) + x_int.to(dtype=torch.float32)) * width_correction.to(
  244. dtype=torch.float32
  245. )
  246. y = (torch.tensor(0.5, dtype=torch.float32) + y_int.to(dtype=torch.float32)) * height_correction.to(
  247. dtype=torch.float32
  248. )
  249. xy_preds_i_0 = x + offset_x_i.to(dtype=torch.float32)
  250. xy_preds_i_1 = y + offset_y_i.to(dtype=torch.float32)
  251. xy_preds_i_2 = torch.ones(xy_preds_i_1.shape, dtype=torch.float32)
  252. xy_preds_i = torch.stack(
  253. [
  254. xy_preds_i_0.to(dtype=torch.float32),
  255. xy_preds_i_1.to(dtype=torch.float32),
  256. xy_preds_i_2.to(dtype=torch.float32),
  257. ],
  258. 0,
  259. )
  260. # TODO: simplify when indexing without rank will be supported by ONNX
  261. base = num_keypoints * num_keypoints + num_keypoints + 1
  262. ind = torch.arange(num_keypoints)
  263. ind = ind.to(dtype=torch.int64) * base
  264. end_scores_i = (
  265. roi_map.index_select(1, y_int.to(dtype=torch.int64))
  266. .index_select(2, x_int.to(dtype=torch.int64))
  267. .view(-1)
  268. .index_select(0, ind.to(dtype=torch.int64))
  269. )
  270. return xy_preds_i, end_scores_i
  271. @torch.jit._script_if_tracing
  272. def _onnx_heatmaps_to_keypoints_loop(
  273. maps, rois, widths_ceil, heights_ceil, widths, heights, offset_x, offset_y, num_keypoints
  274. ):
  275. xy_preds = torch.zeros((0, 3, int(num_keypoints)), dtype=torch.float32, device=maps.device)
  276. end_scores = torch.zeros((0, int(num_keypoints)), dtype=torch.float32, device=maps.device)
  277. for i in range(int(rois.size(0))):
  278. xy_preds_i, end_scores_i = _onnx_heatmaps_to_keypoints(
  279. maps, maps[i], widths_ceil[i], heights_ceil[i], widths[i], heights[i], offset_x[i], offset_y[i]
  280. )
  281. xy_preds = torch.cat((xy_preds.to(dtype=torch.float32), xy_preds_i.unsqueeze(0).to(dtype=torch.float32)), 0)
  282. end_scores = torch.cat(
  283. (end_scores.to(dtype=torch.float32), end_scores_i.to(dtype=torch.float32).unsqueeze(0)), 0
  284. )
  285. return xy_preds, end_scores
  286. def heatmaps_to_keypoints(maps, rois):
  287. """Extract predicted keypoint locations from heatmaps. Output has shape
  288. (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
  289. for each keypoint.
  290. """
  291. # This function converts a discrete image coordinate in a HEATMAP_SIZE x
  292. # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
  293. # consistency with keypoints_to_heatmap_labels by using the conversion from
  294. # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
  295. # continuous coordinate.
  296. offset_x = rois[:, 0]
  297. offset_y = rois[:, 1]
  298. widths = rois[:, 2] - rois[:, 0]
  299. heights = rois[:, 3] - rois[:, 1]
  300. widths = widths.clamp(min=1)
  301. heights = heights.clamp(min=1)
  302. widths_ceil = widths.ceil()
  303. heights_ceil = heights.ceil()
  304. num_keypoints = maps.shape[1]
  305. if torchvision._is_tracing():
  306. xy_preds, end_scores = _onnx_heatmaps_to_keypoints_loop(
  307. maps,
  308. rois,
  309. widths_ceil,
  310. heights_ceil,
  311. widths,
  312. heights,
  313. offset_x,
  314. offset_y,
  315. torch.scalar_tensor(num_keypoints, dtype=torch.int64),
  316. )
  317. return xy_preds.permute(0, 2, 1), end_scores
  318. xy_preds = torch.zeros((len(rois), 3, num_keypoints), dtype=torch.float32, device=maps.device)
  319. end_scores = torch.zeros((len(rois), num_keypoints), dtype=torch.float32, device=maps.device)
  320. for i in range(len(rois)):
  321. roi_map_width = int(widths_ceil[i].item())
  322. roi_map_height = int(heights_ceil[i].item())
  323. width_correction = widths[i] / roi_map_width
  324. height_correction = heights[i] / roi_map_height
  325. roi_map = F.interpolate(
  326. maps[i][:, None], size=(roi_map_height, roi_map_width), mode="bicubic", align_corners=False
  327. )[:, 0]
  328. # roi_map_probs = scores_to_probs(roi_map.copy())
  329. w = roi_map.shape[2]
  330. pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  331. x_int = pos % w
  332. y_int = torch.div(pos - x_int, w, rounding_mode="floor")
  333. # assert (roi_map_probs[k, y_int, x_int] ==
  334. # roi_map_probs[k, :, :].max())
  335. x = (x_int.float() + 0.5) * width_correction
  336. y = (y_int.float() + 0.5) * height_correction
  337. xy_preds[i, 0, :] = x + offset_x[i]
  338. xy_preds[i, 1, :] = y + offset_y[i]
  339. xy_preds[i, 2, :] = 1
  340. end_scores[i, :] = roi_map[torch.arange(num_keypoints, device=roi_map.device), y_int, x_int]
  341. return xy_preds.permute(0, 2, 1), end_scores
  342. def non_maximum_suppression(a):
  343. ap = F.max_pool2d(a, 3, stride=1, padding=1)
  344. mask = (a == ap).float().clamp(min=0.0)
  345. return a * mask
  346. def heatmaps_to_lines(maps, rois):
  347. """Extract predicted keypoint locations from heatmaps. Output has shape
  348. (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
  349. for each keypoint.
  350. """
  351. # This function converts a discrete image coordinate in a HEATMAP_SIZE x
  352. # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
  353. # consistency with keypoints_to_heatmap_labels by using the conversion from
  354. # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
  355. # continuous coordinate.
  356. offset_x = rois[:, 0]
  357. offset_y = rois[:, 1]
  358. widths = rois[:, 2] - rois[:, 0]
  359. heights = rois[:, 3] - rois[:, 1]
  360. widths = widths.clamp(min=1)
  361. heights = heights.clamp(min=1)
  362. widths_ceil = widths.ceil()
  363. heights_ceil = heights.ceil()
  364. num_keypoints = maps.shape[1]
  365. xy_preds = torch.zeros((len(rois), 3, 2), dtype=torch.float32, device=maps.device)
  366. end_scores = torch.zeros((len(rois), 2), dtype=torch.float32, device=maps.device)
  367. for i in range(len(rois)):
  368. roi_map_width = int(widths_ceil[i].item())
  369. roi_map_height = int(heights_ceil[i].item())
  370. width_correction = widths[i] / roi_map_width
  371. height_correction = heights[i] / roi_map_height
  372. roi_map = F.interpolate(
  373. maps[i][:, None], size=(roi_map_height, roi_map_width), mode="bicubic", align_corners=False
  374. )[:, 0]
  375. print(f'roi_map:{roi_map.shape}')
  376. # roi_map_probs = scores_to_probs(roi_map.copy())
  377. w = roi_map.shape[2]
  378. flatten_map=non_maximum_suppression(roi_map).reshape(1, -1)
  379. score, index = torch.topk(flatten_map, k=2)
  380. print(f'index:{index}')
  381. # pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  382. pos=index
  383. x_int = pos % w
  384. y_int = torch.div(pos - x_int, w, rounding_mode="floor")
  385. # assert (roi_map_probs[k, y_int, x_int] ==
  386. # roi_map_probs[k, :, :].max())
  387. x = (x_int.float() + 0.5) * width_correction
  388. y = (y_int.float() + 0.5) * height_correction
  389. xy_preds[i, 0, :] = x + offset_x[i]
  390. xy_preds[i, 1, :] = y + offset_y[i]
  391. xy_preds[i, 2, :] = 1
  392. end_scores[i, :] = roi_map[torch.arange(1, device=roi_map.device), y_int, x_int]
  393. return xy_preds.permute(0, 2, 1), end_scores
  394. def lines_point_pair_loss(line_logits, proposals, gt_lines, line_matched_idxs):
  395. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  396. N, K, H, W = line_logits.shape
  397. len_proposals=len(proposals)
  398. print(f'lines_point_pair_loss line_logits.shape:{line_logits.shape},len_proposals:{len_proposals}')
  399. if H != W:
  400. raise ValueError(
  401. f"line_logits height and width (last two elements of shape) should be equal. Instead got H = {H} and W = {W}"
  402. )
  403. discretization_size = H
  404. heatmaps = []
  405. gs_heatmaps=[]
  406. valid = []
  407. for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_lines, line_matched_idxs):
  408. print(f'proposals_per_image:{proposals_per_image.shape}')
  409. kp = gt_kp_in_image[midx]
  410. gs_heatmaps_per_img = line_points_to_heatmap(kp, proposals_per_image, discretization_size)
  411. gs_heatmaps.append(gs_heatmaps_per_img)
  412. # print(f'heatmaps_per_image:{heatmaps_per_image.shape}')
  413. # heatmaps.append(heatmaps_per_image.view(-1))
  414. # valid.append(valid_per_image.view(-1))
  415. # line_targets = torch.cat(heatmaps, dim=0)
  416. gs_heatmaps=torch.cat(gs_heatmaps,dim=0)
  417. print(f'gs_heatmaps:{gs_heatmaps.shape}, line_logits.shape:{line_logits.squeeze(1).shape}')
  418. # print(f'line_targets:{line_targets.shape},{line_targets}')
  419. # valid = torch.cat(valid, dim=0).to(dtype=torch.uint8)
  420. # valid = torch.where(valid)[0]
  421. # print(f' line_targets[valid]:{line_targets[valid]}')
  422. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  423. # accept empty tensors, so handle it sepaartely
  424. # if line_targets.numel() == 0 or len(valid) == 0:
  425. # return line_logits.sum() * 0
  426. # line_logits = line_logits.view(N * K, H * W)
  427. # print(f'line_logits[valid]:{line_logits[valid].shape}')
  428. line_logits=line_logits.squeeze(1)
  429. # line_loss = F.cross_entropy(line_logits[valid], line_targets[valid])
  430. line_loss=F.cross_entropy(line_logits,gs_heatmaps)
  431. return line_loss
  432. def line_to_box(line,img_size):
  433. p1 = line[:, :2][0]
  434. p2 = line[:, :2][1]
  435. x_coords = torch.tensor([p1[0], p2[0]])
  436. y_coords = torch.tensor([p1[1], p2[1]])
  437. x_min = x_coords.min().clamp(min=0)
  438. y_min = y_coords.min().clamp(min=0)
  439. x_max = x_coords.max().clamp(min=0)
  440. y_max = y_coords.max().clamp(min=0)
  441. x_min = (x_min - 1).clamp(min=0)
  442. y_min = (y_min - 1).clamp(min=0)
  443. x_max = (x_max + 1).clamp(max=img_size)
  444. y_max = (y_max + 1).clamp(max=img_size)
  445. return torch.stack([x_min, y_min, x_max, y_max])
  446. def box_iou(box1, box2):
  447. # box: [x1, y1, x2, y2]
  448. lt = torch.max(box1[:2], box2[:2])
  449. rb = torch.min(box1[2:], box2[2:])
  450. wh = (rb - lt).clamp(min=0)
  451. inter_area = wh[0] * wh[1]
  452. area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
  453. area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
  454. union_area = area1 + area2 - inter_area
  455. iou = inter_area / (union_area + 1e-6)
  456. return iou
  457. def line_iou_loss(x, boxes, gt_lines, matched_idx,img_size):
  458. losses = []
  459. boxes_per_image = [box.size(0) for box in boxes]
  460. x2 = x.split(boxes_per_image, dim=0)
  461. for xx, bb, gt_line, mid in zip(x2, boxes, gt_lines, matched_idx):
  462. p_prob, scores = heatmaps_to_lines(xx, bb)
  463. pred_lines = p_prob
  464. gt_line_points = gt_line[mid]
  465. if len(pred_lines) == 0 or len(gt_line_points) == 0:
  466. continue
  467. # 匈牙利匹配,避免顺序错位
  468. cost_matrix = torch.zeros((len(pred_lines), len(gt_line_points)))
  469. for i, pline in enumerate(pred_lines):
  470. for j, gline in enumerate(gt_line_points):
  471. box1 = line_to_box(pline,img_size)
  472. box2 = line_to_box(gline,img_size)
  473. cost_matrix[i, j] = 1.0 - box_iou(box1, box2)
  474. row_ind, col_ind = linear_sum_assignment(cost_matrix.numpy())
  475. for r, c in zip(row_ind, col_ind):
  476. box1 = line_to_box(pred_lines[r],img_size)
  477. box2 = line_to_box(gt_line_points[c],img_size)
  478. iou = box_iou(box1, box2)
  479. losses.append(1.0 - iou)
  480. total_loss = torch.mean(torch.stack(losses)) if losses else None
  481. return total_loss
  482. def line_inference(x, boxes):
  483. # type: (Tensor, List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  484. points_probs = []
  485. points_scores = []
  486. boxes_per_image = [box.size(0) for box in boxes]
  487. x2 = x.split(boxes_per_image, dim=0)
  488. for xx, bb in zip(x2, boxes):
  489. p_prob, scores = heatmaps_to_lines(xx, bb)
  490. points_probs.append(p_prob)
  491. points_scores.append(scores)
  492. return points_probs, points_scores
  493. def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched_idxs):
  494. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  495. N, K, H, W = keypoint_logits.shape
  496. if H != W:
  497. raise ValueError(
  498. f"keypoint_logits height and width (last two elements of shape) should be equal. Instead got H = {H} and W = {W}"
  499. )
  500. discretization_size = H
  501. heatmaps = []
  502. valid = []
  503. for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_keypoints, keypoint_matched_idxs):
  504. kp = gt_kp_in_image[midx]
  505. heatmaps_per_image, valid_per_image = keypoints_to_heatmap(kp, proposals_per_image, discretization_size)
  506. heatmaps.append(heatmaps_per_image.view(-1))
  507. valid.append(valid_per_image.view(-1))
  508. keypoint_targets = torch.cat(heatmaps, dim=0)
  509. valid = torch.cat(valid, dim=0).to(dtype=torch.uint8)
  510. valid = torch.where(valid)[0]
  511. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  512. # accept empty tensors, so handle it sepaartely
  513. if keypoint_targets.numel() == 0 or len(valid) == 0:
  514. return keypoint_logits.sum() * 0
  515. keypoint_logits = keypoint_logits.view(N * K, H * W)
  516. keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid])
  517. return keypoint_loss
  518. def keypointrcnn_inference(x, boxes):
  519. # type: (Tensor, List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  520. kp_probs = []
  521. kp_scores = []
  522. boxes_per_image = [box.size(0) for box in boxes]
  523. x2 = x.split(boxes_per_image, dim=0)
  524. for xx, bb in zip(x2, boxes):
  525. kp_prob, scores = heatmaps_to_keypoints(xx, bb)
  526. kp_probs.append(kp_prob)
  527. kp_scores.append(scores)
  528. return kp_probs, kp_scores
  529. def _onnx_expand_boxes(boxes, scale):
  530. # type: (Tensor, float) -> Tensor
  531. w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
  532. h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
  533. x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
  534. y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
  535. w_half = w_half.to(dtype=torch.float32) * scale
  536. h_half = h_half.to(dtype=torch.float32) * scale
  537. boxes_exp0 = x_c - w_half
  538. boxes_exp1 = y_c - h_half
  539. boxes_exp2 = x_c + w_half
  540. boxes_exp3 = y_c + h_half
  541. boxes_exp = torch.stack((boxes_exp0, boxes_exp1, boxes_exp2, boxes_exp3), 1)
  542. return boxes_exp
  543. # the next two functions should be merged inside Masker
  544. # but are kept here for the moment while we need them
  545. # temporarily for paste_mask_in_image
  546. def expand_boxes(boxes, scale):
  547. # type: (Tensor, float) -> Tensor
  548. if torchvision._is_tracing():
  549. return _onnx_expand_boxes(boxes, scale)
  550. w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
  551. h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
  552. x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
  553. y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
  554. w_half *= scale
  555. h_half *= scale
  556. boxes_exp = torch.zeros_like(boxes)
  557. boxes_exp[:, 0] = x_c - w_half
  558. boxes_exp[:, 2] = x_c + w_half
  559. boxes_exp[:, 1] = y_c - h_half
  560. boxes_exp[:, 3] = y_c + h_half
  561. return boxes_exp
  562. @torch.jit.unused
  563. def expand_masks_tracing_scale(M, padding):
  564. # type: (int, int) -> float
  565. return torch.tensor(M + 2 * padding).to(torch.float32) / torch.tensor(M).to(torch.float32)
  566. def expand_masks(mask, padding):
  567. # type: (Tensor, int) -> Tuple[Tensor, float]
  568. M = mask.shape[-1]
  569. if torch._C._get_tracing_state(): # could not import is_tracing(), not sure why
  570. scale = expand_masks_tracing_scale(M, padding)
  571. else:
  572. scale = float(M + 2 * padding) / M
  573. padded_mask = F.pad(mask, (padding,) * 4)
  574. return padded_mask, scale
  575. def paste_mask_in_image(mask, box, im_h, im_w):
  576. # type: (Tensor, Tensor, int, int) -> Tensor
  577. TO_REMOVE = 1
  578. w = int(box[2] - box[0] + TO_REMOVE)
  579. h = int(box[3] - box[1] + TO_REMOVE)
  580. w = max(w, 1)
  581. h = max(h, 1)
  582. # Set shape to [batchxCxHxW]
  583. mask = mask.expand((1, 1, -1, -1))
  584. # Resize mask
  585. mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
  586. mask = mask[0][0]
  587. im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device)
  588. x_0 = max(box[0], 0)
  589. x_1 = min(box[2] + 1, im_w)
  590. y_0 = max(box[1], 0)
  591. y_1 = min(box[3] + 1, im_h)
  592. im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - box[1]) : (y_1 - box[1]), (x_0 - box[0]) : (x_1 - box[0])]
  593. return im_mask
  594. def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
  595. one = torch.ones(1, dtype=torch.int64)
  596. zero = torch.zeros(1, dtype=torch.int64)
  597. w = box[2] - box[0] + one
  598. h = box[3] - box[1] + one
  599. w = torch.max(torch.cat((w, one)))
  600. h = torch.max(torch.cat((h, one)))
  601. # Set shape to [batchxCxHxW]
  602. mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
  603. # Resize mask
  604. mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
  605. mask = mask[0][0]
  606. x_0 = torch.max(torch.cat((box[0].unsqueeze(0), zero)))
  607. x_1 = torch.min(torch.cat((box[2].unsqueeze(0) + one, im_w.unsqueeze(0))))
  608. y_0 = torch.max(torch.cat((box[1].unsqueeze(0), zero)))
  609. y_1 = torch.min(torch.cat((box[3].unsqueeze(0) + one, im_h.unsqueeze(0))))
  610. unpaded_im_mask = mask[(y_0 - box[1]) : (y_1 - box[1]), (x_0 - box[0]) : (x_1 - box[0])]
  611. # TODO : replace below with a dynamic padding when support is added in ONNX
  612. # pad y
  613. zeros_y0 = torch.zeros(y_0, unpaded_im_mask.size(1))
  614. zeros_y1 = torch.zeros(im_h - y_1, unpaded_im_mask.size(1))
  615. concat_0 = torch.cat((zeros_y0, unpaded_im_mask.to(dtype=torch.float32), zeros_y1), 0)[0:im_h, :]
  616. # pad x
  617. zeros_x0 = torch.zeros(concat_0.size(0), x_0)
  618. zeros_x1 = torch.zeros(concat_0.size(0), im_w - x_1)
  619. im_mask = torch.cat((zeros_x0, concat_0, zeros_x1), 1)[:, :im_w]
  620. return im_mask
  621. @torch.jit._script_if_tracing
  622. def _onnx_paste_masks_in_image_loop(masks, boxes, im_h, im_w):
  623. res_append = torch.zeros(0, im_h, im_w)
  624. for i in range(masks.size(0)):
  625. mask_res = _onnx_paste_mask_in_image(masks[i][0], boxes[i], im_h, im_w)
  626. mask_res = mask_res.unsqueeze(0)
  627. res_append = torch.cat((res_append, mask_res))
  628. return res_append
  629. def paste_masks_in_image(masks, boxes, img_shape, padding=1):
  630. # type: (Tensor, Tensor, Tuple[int, int], int) -> Tensor
  631. masks, scale = expand_masks(masks, padding=padding)
  632. boxes = expand_boxes(boxes, scale).to(dtype=torch.int64)
  633. im_h, im_w = img_shape
  634. if torchvision._is_tracing():
  635. return _onnx_paste_masks_in_image_loop(
  636. masks, boxes, torch.scalar_tensor(im_h, dtype=torch.int64), torch.scalar_tensor(im_w, dtype=torch.int64)
  637. )[:, None]
  638. res = [paste_mask_in_image(m[0], b, im_h, im_w) for m, b in zip(masks, boxes)]
  639. if len(res) > 0:
  640. ret = torch.stack(res, dim=0)[:, None]
  641. else:
  642. ret = masks.new_empty((0, 1, im_h, im_w))
  643. return ret
  644. class RoIHeads(nn.Module):
  645. __annotations__ = {
  646. "box_coder": det_utils.BoxCoder,
  647. "proposal_matcher": det_utils.Matcher,
  648. "fg_bg_sampler": det_utils.BalancedPositiveNegativeSampler,
  649. }
  650. def __init__(
  651. self,
  652. box_roi_pool,
  653. box_head,
  654. box_predictor,
  655. # Faster R-CNN training
  656. fg_iou_thresh,
  657. bg_iou_thresh,
  658. batch_size_per_image,
  659. positive_fraction,
  660. bbox_reg_weights,
  661. # Faster R-CNN inference
  662. score_thresh,
  663. nms_thresh,
  664. detections_per_img,
  665. # Line
  666. line_roi_pool=None,
  667. line_head=None,
  668. line_predictor=None,
  669. # Mask
  670. mask_roi_pool=None,
  671. mask_head=None,
  672. mask_predictor=None,
  673. keypoint_roi_pool=None,
  674. keypoint_head=None,
  675. keypoint_predictor=None,
  676. ):
  677. super().__init__()
  678. self.box_similarity = box_ops.box_iou
  679. # assign ground-truth boxes for each proposal
  680. self.proposal_matcher = det_utils.Matcher(fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)
  681. self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(batch_size_per_image, positive_fraction)
  682. if bbox_reg_weights is None:
  683. bbox_reg_weights = (10.0, 10.0, 5.0, 5.0)
  684. self.box_coder = det_utils.BoxCoder(bbox_reg_weights)
  685. self.box_roi_pool = box_roi_pool
  686. self.box_head = box_head
  687. self.box_predictor = box_predictor
  688. self.score_thresh = score_thresh
  689. self.nms_thresh = nms_thresh
  690. self.detections_per_img = detections_per_img
  691. self.line_roi_pool = line_roi_pool
  692. self.line_head = line_head
  693. self.line_predictor = line_predictor
  694. self.mask_roi_pool = mask_roi_pool
  695. self.mask_head = mask_head
  696. self.mask_predictor = mask_predictor
  697. self.keypoint_roi_pool = keypoint_roi_pool
  698. self.keypoint_head = keypoint_head
  699. self.keypoint_predictor = keypoint_predictor
  700. def has_mask(self):
  701. if self.mask_roi_pool is None:
  702. return False
  703. if self.mask_head is None:
  704. return False
  705. if self.mask_predictor is None:
  706. return False
  707. return True
  708. def has_keypoint(self):
  709. if self.keypoint_roi_pool is None:
  710. return False
  711. if self.keypoint_head is None:
  712. return False
  713. if self.keypoint_predictor is None:
  714. return False
  715. return True
  716. def has_line(self):
  717. if self.line_roi_pool is None:
  718. return False
  719. if self.line_head is None:
  720. return False
  721. if self.line_predictor is None:
  722. return False
  723. return True
  724. def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):
  725. # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  726. matched_idxs = []
  727. labels = []
  728. for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):
  729. if gt_boxes_in_image.numel() == 0:
  730. # Background image
  731. device = proposals_in_image.device
  732. clamped_matched_idxs_in_image = torch.zeros(
  733. (proposals_in_image.shape[0],), dtype=torch.int64, device=device
  734. )
  735. labels_in_image = torch.zeros((proposals_in_image.shape[0],), dtype=torch.int64, device=device)
  736. else:
  737. # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
  738. match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)
  739. matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)
  740. clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
  741. labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
  742. labels_in_image = labels_in_image.to(dtype=torch.int64)
  743. # Label background (below the low threshold)
  744. bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD
  745. labels_in_image[bg_inds] = 0
  746. # Label ignore proposals (between low and high thresholds)
  747. ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS
  748. labels_in_image[ignore_inds] = -1 # -1 is ignored by sampler
  749. matched_idxs.append(clamped_matched_idxs_in_image)
  750. labels.append(labels_in_image)
  751. return matched_idxs, labels
  752. def subsample(self, labels):
  753. # type: (List[Tensor]) -> List[Tensor]
  754. sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
  755. sampled_inds = []
  756. for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)):
  757. img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0]
  758. sampled_inds.append(img_sampled_inds)
  759. return sampled_inds
  760. def add_gt_proposals(self, proposals, gt_boxes):
  761. # type: (List[Tensor], List[Tensor]) -> List[Tensor]
  762. proposals = [torch.cat((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes)]
  763. return proposals
  764. def check_targets(self, targets):
  765. # type: (Optional[List[Dict[str, Tensor]]]) -> None
  766. if targets is None:
  767. raise ValueError("targets should not be None")
  768. if not all(["boxes" in t for t in targets]):
  769. raise ValueError("Every element of targets should have a boxes key")
  770. if not all(["labels" in t for t in targets]):
  771. raise ValueError("Every element of targets should have a labels key")
  772. if self.has_mask():
  773. if not all(["masks" in t for t in targets]):
  774. raise ValueError("Every element of targets should have a masks key")
  775. def select_training_samples(
  776. self,
  777. proposals, # type: List[Tensor]
  778. targets, # type: Optional[List[Dict[str, Tensor]]]
  779. ):
  780. # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]
  781. self.check_targets(targets)
  782. if targets is None:
  783. raise ValueError("targets should not be None")
  784. dtype = proposals[0].dtype
  785. device = proposals[0].device
  786. gt_boxes = [t["boxes"].to(dtype) for t in targets]
  787. gt_labels = [t["labels"] for t in targets]
  788. # append ground-truth bboxes to propos
  789. proposals = self.add_gt_proposals(proposals, gt_boxes)
  790. # get matching gt indices for each proposal
  791. matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)
  792. # sample a fixed proportion of positive-negative proposals
  793. sampled_inds = self.subsample(labels)
  794. matched_gt_boxes = []
  795. num_images = len(proposals)
  796. for img_id in range(num_images):
  797. img_sampled_inds = sampled_inds[img_id]
  798. proposals[img_id] = proposals[img_id][img_sampled_inds]
  799. labels[img_id] = labels[img_id][img_sampled_inds]
  800. matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]
  801. gt_boxes_in_image = gt_boxes[img_id]
  802. if gt_boxes_in_image.numel() == 0:
  803. gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)
  804. matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])
  805. regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)
  806. return proposals, matched_idxs, labels, regression_targets
  807. def postprocess_detections(
  808. self,
  809. class_logits, # type: Tensor
  810. box_regression, # type: Tensor
  811. proposals, # type: List[Tensor]
  812. image_shapes, # type: List[Tuple[int, int]]
  813. ):
  814. # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
  815. device = class_logits.device
  816. num_classes = class_logits.shape[-1]
  817. boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
  818. pred_boxes = self.box_coder.decode(box_regression, proposals)
  819. pred_scores = F.softmax(class_logits, -1)
  820. pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
  821. pred_scores_list = pred_scores.split(boxes_per_image, 0)
  822. all_boxes = []
  823. all_scores = []
  824. all_labels = []
  825. for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):
  826. boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
  827. # create labels for each prediction
  828. labels = torch.arange(num_classes, device=device)
  829. labels = labels.view(1, -1).expand_as(scores)
  830. # remove predictions with the background label
  831. boxes = boxes[:, 1:]
  832. scores = scores[:, 1:]
  833. labels = labels[:, 1:]
  834. # batch everything, by making every class prediction be a separate instance
  835. boxes = boxes.reshape(-1, 4)
  836. scores = scores.reshape(-1)
  837. labels = labels.reshape(-1)
  838. # remove low scoring boxes
  839. inds = torch.where(scores > self.score_thresh)[0]
  840. boxes, scores, labels = boxes[inds], scores[inds], labels[inds]
  841. # remove empty boxes
  842. keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
  843. boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
  844. # non-maximum suppression, independently done per class
  845. keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
  846. # keep only topk scoring predictions
  847. keep = keep[: self.detections_per_img]
  848. boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
  849. all_boxes.append(boxes)
  850. all_scores.append(scores)
  851. all_labels.append(labels)
  852. return all_boxes, all_scores, all_labels
  853. def forward(
  854. self,
  855. features, # type: Dict[str, Tensor]
  856. proposals, # type: List[Tensor]
  857. image_shapes, # type: List[Tuple[int, int]]
  858. targets=None, # type: Optional[List[Dict[str, Tensor]]]
  859. ):
  860. # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]
  861. """
  862. Args:
  863. features (List[Tensor])
  864. proposals (List[Tensor[N, 4]])
  865. image_shapes (List[Tuple[H, W]])
  866. targets (List[Dict])
  867. """
  868. print(f'roihead forward!!!')
  869. if targets is not None:
  870. for t in targets:
  871. # TODO: https://github.com/pytorch/pytorch/issues/26731
  872. floating_point_types = (torch.float, torch.double, torch.half)
  873. if not t["boxes"].dtype in floating_point_types:
  874. raise TypeError(f"target boxes must of float type, instead got {t['boxes'].dtype}")
  875. if not t["labels"].dtype == torch.int64:
  876. raise TypeError(f"target labels must of int64 type, instead got {t['labels'].dtype}")
  877. if self.has_keypoint():
  878. if not t["keypoints"].dtype == torch.float32:
  879. raise TypeError(f"target keypoints must of float type, instead got {t['keypoints'].dtype}")
  880. if self.training:
  881. proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
  882. else:
  883. if targets is not None:
  884. proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
  885. else:
  886. labels = None
  887. regression_targets = None
  888. matched_idxs = None
  889. box_features = self.box_roi_pool(features, proposals, image_shapes)
  890. box_features = self.box_head(box_features)
  891. class_logits, box_regression = self.box_predictor(box_features)
  892. result: List[Dict[str, torch.Tensor]] = []
  893. losses = {}
  894. # _, C, H, W = features['0'].shape # 忽略 batch_size,因为我们只关心 C, H, W
  895. if self.training:
  896. if labels is None:
  897. raise ValueError("labels cannot be None")
  898. if regression_targets is None:
  899. raise ValueError("regression_targets cannot be None")
  900. print(f'boxes compute losses')
  901. loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
  902. losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
  903. else:
  904. if targets is not None:
  905. loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
  906. losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
  907. boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals,
  908. image_shapes)
  909. num_images = len(boxes)
  910. for i in range(num_images):
  911. result.append(
  912. {
  913. "boxes": boxes[i],
  914. "labels": labels[i],
  915. "scores": scores[i],
  916. }
  917. )
  918. if self.has_line():
  919. print(f'roi_heads forward has_line()!!!!')
  920. line_proposals = [p["boxes"] for p in result]
  921. print(f'boxes_proposals:{len(line_proposals)}')
  922. # if line_proposals is None or len(line_proposals) == 0:
  923. # # 返回空特征或者跳过该部分计算
  924. # return torch.empty(0, C, H, W).to(features['0'].device)
  925. if self.training:
  926. # during training, only focus on positive boxes
  927. num_images = len(proposals)
  928. print(f'num_images:{num_images}')
  929. line_proposals = []
  930. pos_matched_idxs = []
  931. if matched_idxs is None:
  932. raise ValueError("if in trainning, matched_idxs should not be None")
  933. for img_id in range(num_images):
  934. pos = torch.where(labels[img_id] > 0)[0]
  935. line_proposals.append(proposals[img_id][pos])
  936. pos_matched_idxs.append(matched_idxs[img_id][pos])
  937. else:
  938. if targets is not None:
  939. pos_matched_idxs = []
  940. num_images = len(proposals)
  941. line_proposals = []
  942. print(f'val num_images:{num_images}')
  943. if matched_idxs is None:
  944. raise ValueError("if in trainning, matched_idxs should not be None")
  945. for img_id in range(num_images):
  946. pos = torch.where(labels[img_id] > 0)[0]
  947. line_proposals.append(proposals[img_id][pos])
  948. pos_matched_idxs.append(matched_idxs[img_id][pos])
  949. else:
  950. pos_matched_idxs = None
  951. print(f'line_proposals:{len(line_proposals)}')
  952. line_features = self.line_roi_pool(features, line_proposals, image_shapes)
  953. print(f'line_features from line_roi_pool:{line_features.shape}')
  954. line_features = self.line_head(line_features)
  955. print(f'line_features from line_head:{line_features.shape}')
  956. line_logits = self.line_predictor(line_features)
  957. print(f'line_logits:{line_logits.shape}')
  958. loss_line = {}
  959. loss_line_iou={}
  960. img_size=512
  961. if self.training:
  962. if targets is None or pos_matched_idxs is None:
  963. raise ValueError("both targets and pos_matched_idxs should not be None when in training mode")
  964. gt_lines = [t["lines"] for t in targets]
  965. rcnn_loss_line = lines_point_pair_loss(
  966. line_logits, line_proposals, gt_lines, pos_matched_idxs
  967. )
  968. iou_loss = line_iou_loss(line_logits, line_proposals, gt_lines, pos_matched_idxs,img_size)
  969. loss_line = {"loss_line": rcnn_loss_line}
  970. loss_line_iou = {'loss_line_iou': iou_loss}
  971. else:
  972. if targets is not None:
  973. gt_lines = [t["lines"] for t in targets]
  974. rcnn_loss_lines = lines_point_pair_loss(
  975. line_logits, line_proposals, gt_lines, pos_matched_idxs
  976. )
  977. loss_line = {"loss_line": rcnn_loss_lines}
  978. iou_loss =line_iou_loss(line_logits, line_proposals,gt_lines,pos_matched_idxs,img_size)
  979. loss_line_iou={'loss_line_iou':iou_loss}
  980. else:
  981. if line_logits is None or line_proposals is None:
  982. raise ValueError(
  983. "both keypoint_logits and keypoint_proposals should not be None when not in training mode"
  984. )
  985. lines_probs, kp_scores = line_inference(line_logits, line_proposals)
  986. for keypoint_prob, kps, r in zip(lines_probs, kp_scores, result):
  987. r["lines"] = keypoint_prob
  988. r["liness_scores"] = kps
  989. losses.update(loss_line)
  990. losses.update(loss_line_iou)
  991. if self.has_mask():
  992. mask_proposals = [p["boxes"] for p in result]
  993. if self.training:
  994. if matched_idxs is None:
  995. raise ValueError("if in training, matched_idxs should not be None")
  996. # during training, only focus on positive boxes
  997. num_images = len(proposals)
  998. mask_proposals = []
  999. pos_matched_idxs = []
  1000. for img_id in range(num_images):
  1001. pos = torch.where(labels[img_id] > 0)[0]
  1002. mask_proposals.append(proposals[img_id][pos])
  1003. pos_matched_idxs.append(matched_idxs[img_id][pos])
  1004. else:
  1005. pos_matched_idxs = None
  1006. if self.mask_roi_pool is not None:
  1007. mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)
  1008. mask_features = self.mask_head(mask_features)
  1009. mask_logits = self.mask_predictor(mask_features)
  1010. else:
  1011. raise Exception("Expected mask_roi_pool to be not None")
  1012. loss_mask = {}
  1013. if self.training:
  1014. if targets is None or pos_matched_idxs is None or mask_logits is None:
  1015. raise ValueError("targets, pos_matched_idxs, mask_logits cannot be None when training")
  1016. gt_masks = [t["masks"] for t in targets]
  1017. gt_labels = [t["labels"] for t in targets]
  1018. rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs)
  1019. loss_mask = {"loss_mask": rcnn_loss_mask}
  1020. else:
  1021. labels = [r["labels"] for r in result]
  1022. masks_probs = maskrcnn_inference(mask_logits, labels)
  1023. for mask_prob, r in zip(masks_probs, result):
  1024. r["masks"] = mask_prob
  1025. losses.update(loss_mask)
  1026. # keep none checks in if conditional so torchscript will conditionally
  1027. # compile each branch
  1028. if self.has_keypoint():
  1029. keypoint_proposals = [p["boxes"] for p in result]
  1030. if self.training:
  1031. # during training, only focus on positive boxes
  1032. num_images = len(proposals)
  1033. keypoint_proposals = []
  1034. pos_matched_idxs = []
  1035. if matched_idxs is None:
  1036. raise ValueError("if in trainning, matched_idxs should not be None")
  1037. for img_id in range(num_images):
  1038. pos = torch.where(labels[img_id] > 0)[0]
  1039. keypoint_proposals.append(proposals[img_id][pos])
  1040. pos_matched_idxs.append(matched_idxs[img_id][pos])
  1041. else:
  1042. pos_matched_idxs = None
  1043. keypoint_features = self.line_roi_pool(features, keypoint_proposals, image_shapes)
  1044. keypoint_features = self.line_head(keypoint_features)
  1045. keypoint_logits = self.line_predictor(keypoint_features)
  1046. loss_keypoint = {}
  1047. if self.training:
  1048. if targets is None or pos_matched_idxs is None:
  1049. raise ValueError("both targets and pos_matched_idxs should not be None when in training mode")
  1050. gt_keypoints = [t["keypoints"] for t in targets]
  1051. rcnn_loss_keypoint = keypointrcnn_loss(
  1052. keypoint_logits, keypoint_proposals, gt_keypoints, pos_matched_idxs
  1053. )
  1054. loss_keypoint = {"loss_keypoint": rcnn_loss_keypoint}
  1055. else:
  1056. if keypoint_logits is None or keypoint_proposals is None:
  1057. raise ValueError(
  1058. "both keypoint_logits and keypoint_proposals should not be None when not in training mode"
  1059. )
  1060. keypoints_probs, kp_scores = keypointrcnn_inference(keypoint_logits, keypoint_proposals)
  1061. for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result):
  1062. r["keypoints"] = keypoint_prob
  1063. r["keypoints_scores"] = kps
  1064. losses.update(loss_keypoint)
  1065. return result, losses