roi_heads.py 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177
  1. from typing import Dict, List, Optional, Tuple
  2. import torch
  3. import torch.nn.functional as F
  4. import torchvision
  5. from torch import nn, Tensor
  6. from torchvision.ops import boxes as box_ops, roi_align
  7. import libs.vision_libs.models.detection._utils as det_utils
  8. from collections import OrderedDict
  9. def l2loss(input, target):
  10. return ((target - input) ** 2).mean(2).mean(1)
  11. def cross_entropy_loss(logits, positive):
  12. nlogp = -F.log_softmax(logits, dim=0)
  13. return (positive * nlogp[1] + (1 - positive) * nlogp[0]).mean(2).mean(1)
  14. def sigmoid_l1_loss(logits, target, offset=0.0, mask=None):
  15. logp = torch.sigmoid(logits) + offset
  16. loss = torch.abs(logp - target)
  17. if mask is not None:
  18. w = mask.mean(2, True).mean(1, True)
  19. w[w == 0] = 1
  20. loss = loss * (mask / w)
  21. return loss.mean(2).mean(1)
  22. ###计算多头损失
  23. def line_head_loss(input_dict, outputs, feature, loss_weight, mode_train):
  24. # image = input_dict["image"]
  25. # target_b = input_dict["target_b"]
  26. # outputs, feature, aaa = self.backbone(image, target_b, input_dict["mode"]) # train时aaa是损失,val时是box
  27. result = {"feature": feature}
  28. batch, channel, row, col = outputs[0].shape
  29. T = input_dict["target"].copy()
  30. n_jtyp = T["junc_map"].shape[1]
  31. # switch to CNHW
  32. for task in ["junc_map"]:
  33. T[task] = T[task].permute(1, 0, 2, 3)
  34. for task in ["junc_offset"]:
  35. T[task] = T[task].permute(1, 2, 0, 3, 4)
  36. offset = [2, 3, 5]
  37. losses = []
  38. for stack, output in enumerate(outputs):
  39. output = output.transpose(0, 1).reshape([-1, batch, row, col]).contiguous()
  40. jmap = output[0: offset[0]].reshape(n_jtyp, 2, batch, row, col)
  41. lmap = output[offset[0]: offset[1]].squeeze(0)
  42. # print(f"lmap:{lmap.shape}")
  43. joff = output[offset[1]: offset[2]].reshape(n_jtyp, 2, batch, row, col)
  44. if stack == 0:
  45. result["preds"] = {
  46. "jmap": jmap.permute(2, 0, 1, 3, 4).softmax(2)[:, :, 1],
  47. "lmap": lmap.sigmoid(),
  48. "joff": joff.permute(2, 0, 1, 3, 4).sigmoid() - 0.5,
  49. }
  50. if mode_train == False:
  51. return result
  52. L = OrderedDict()
  53. L["jmap"] = sum(
  54. cross_entropy_loss(jmap[i], T["junc_map"][i]) for i in range(n_jtyp)
  55. )
  56. L["lmap"] = (
  57. F.binary_cross_entropy_with_logits(lmap, T["line_map"], reduction="none")
  58. .mean(2)
  59. .mean(1)
  60. )
  61. L["joff"] = sum(
  62. sigmoid_l1_loss(joff[i, j], T["junc_offset"][i, j], -0.5, T["junc_map"][i])
  63. for i in range(n_jtyp)
  64. for j in range(2)
  65. )
  66. for loss_name in L:
  67. L[loss_name].mul_(loss_weight[loss_name])
  68. losses.append(L)
  69. result["losses"] = losses
  70. # result["aaa"] = aaa
  71. return result
  72. # 计算线性损失
  73. def line_vectorizer_loss(result, x, ys, idx, jcs, n_batch, ps, n_out_line, n_out_junc, loss_weight, mode_train):
  74. if mode_train == False:
  75. p = torch.cat(ps)
  76. s = torch.sigmoid(x)
  77. b = s > 0.5
  78. lines = []
  79. score = []
  80. for i in range(n_batch):
  81. p0 = p[idx[i]: idx[i + 1]]
  82. s0 = s[idx[i]: idx[i + 1]]
  83. mask = b[idx[i]: idx[i + 1]]
  84. p0 = p0[mask]
  85. s0 = s0[mask]
  86. if len(p0) == 0:
  87. lines.append(torch.zeros([1, n_out_line, 2, 2], device=p.device))
  88. score.append(torch.zeros([1, n_out_line], device=p.device))
  89. else:
  90. arg = torch.argsort(s0, descending=True)
  91. p0, s0 = p0[arg], s0[arg]
  92. lines.append(p0[None, torch.arange(n_out_line) % len(p0)])
  93. score.append(s0[None, torch.arange(n_out_line) % len(s0)])
  94. for j in range(len(jcs[i])):
  95. if len(jcs[i][j]) == 0:
  96. jcs[i][j] = torch.zeros([n_out_junc, 2], device=p.device)
  97. jcs[i][j] = jcs[i][j][
  98. None, torch.arange(n_out_junc) % len(jcs[i][j])
  99. ]
  100. result["preds"]["lines"] = torch.cat(lines)
  101. result["preds"]["score"] = torch.cat(score)
  102. result["preds"]["juncs"] = torch.cat([jcs[i][0] for i in range(n_batch)])
  103. if len(jcs[i]) > 1:
  104. result["preds"]["junts"] = torch.cat(
  105. [jcs[i][1] for i in range(n_batch)]
  106. )
  107. # if input_dict["mode"] != "testing":
  108. y = torch.cat(ys)
  109. loss = nn.BCEWithLogitsLoss(reduction="none")
  110. loss = loss(x, y)
  111. lpos_mask, lneg_mask = y, 1 - y
  112. loss_lpos, loss_lneg = loss * lpos_mask, loss * lneg_mask
  113. def sum_batch(x):
  114. xs = [x[idx[i]: idx[i + 1]].sum()[None] for i in range(n_batch)]
  115. return torch.cat(xs)
  116. lpos = sum_batch(loss_lpos) / sum_batch(lpos_mask).clamp(min=1)
  117. lneg = sum_batch(loss_lneg) / sum_batch(lneg_mask).clamp(min=1)
  118. result["losses"][0]["lpos"] = lpos * loss_weight["lpos"]
  119. result["losses"][0]["lneg"] = lneg * loss_weight["lneg"]
  120. if mode_train == True:
  121. del result["preds"]
  122. return result
  123. def wirepoint_head_line_loss(targets, output, x, y, idx, loss_weight):
  124. # output, feature: head返回结果
  125. # x, y, idx : line中间生成结果
  126. result = {}
  127. batch, channel, row, col = output.shape
  128. wires_targets = [t["wires"] for t in targets]
  129. wires_targets = wires_targets.copy()
  130. # print(f'wires_target:{wires_targets}')
  131. # 提取所有 'junc_map', 'junc_offset', 'line_map' 的张量
  132. junc_maps = [d["junc_map"] for d in wires_targets]
  133. junc_offsets = [d["junc_offset"] for d in wires_targets]
  134. line_maps = [d["line_map"] for d in wires_targets]
  135. junc_map_tensor = torch.stack(junc_maps, dim=0)
  136. junc_offset_tensor = torch.stack(junc_offsets, dim=0)
  137. line_map_tensor = torch.stack(line_maps, dim=0)
  138. T = {"junc_map": junc_map_tensor, "junc_offset": junc_offset_tensor, "line_map": line_map_tensor}
  139. n_jtyp = T["junc_map"].shape[1]
  140. for task in ["junc_map"]:
  141. T[task] = T[task].permute(1, 0, 2, 3)
  142. for task in ["junc_offset"]:
  143. T[task] = T[task].permute(1, 2, 0, 3, 4)
  144. offset = [2, 3, 5]
  145. losses = []
  146. output = output.transpose(0, 1).reshape([-1, batch, row, col]).contiguous()
  147. jmap = output[0: offset[0]].reshape(n_jtyp, 2, batch, row, col)
  148. lmap = output[offset[0]: offset[1]].squeeze(0)
  149. joff = output[offset[1]: offset[2]].reshape(n_jtyp, 2, batch, row, col)
  150. L = OrderedDict()
  151. L["junc_map"] = sum(
  152. cross_entropy_loss(jmap[i], T["junc_map"][i]) for i in range(n_jtyp)
  153. ).mean()
  154. L["line_map"] = (
  155. F.binary_cross_entropy_with_logits(lmap, T["line_map"], reduction="none")
  156. .mean(2)
  157. .mean(1)
  158. ).mean()
  159. L["junc_offset"] = sum(
  160. sigmoid_l1_loss(joff[i, j], T["junc_offset"][i, j], -0.5, T["junc_map"][i])
  161. for i in range(n_jtyp)
  162. for j in range(2)
  163. ).mean()
  164. for loss_name in L:
  165. L[loss_name].mul_(loss_weight[loss_name])
  166. losses.append(L)
  167. result["losses"] = losses
  168. loss = nn.BCEWithLogitsLoss(reduction="none")
  169. loss = loss(x, y)
  170. lpos_mask, lneg_mask = y, 1 - y
  171. loss_lpos, loss_lneg = loss * lpos_mask, loss * lneg_mask
  172. def sum_batch(x):
  173. xs = [x[idx[i]: idx[i + 1]].sum()[None] for i in range(batch)]
  174. return torch.cat(xs)
  175. lpos = sum_batch(loss_lpos) / sum_batch(lpos_mask).clamp(min=1)
  176. lneg = sum_batch(loss_lneg) / sum_batch(lneg_mask).clamp(min=1)
  177. result["losses"][0]["lpos"] = (lpos * loss_weight["lpos"]).mean()
  178. result["losses"][0]["lneg"] = (lneg * loss_weight["lneg"]).mean()
  179. return result
  180. def wirepoint_inference(input, idx, jcs, n_batch, ps, n_out_line, n_out_junc):
  181. result = {}
  182. result["wires"] = {}
  183. p = torch.cat(ps)
  184. s = torch.sigmoid(input)
  185. b = s > 0.5
  186. lines = []
  187. score = []
  188. # print(f"n_batch:{n_batch}")
  189. for i in range(n_batch):
  190. # print(f"idx:{idx}")
  191. p0 = p[idx[i]: idx[i + 1]]
  192. s0 = s[idx[i]: idx[i + 1]]
  193. mask = b[idx[i]: idx[i + 1]]
  194. p0 = p0[mask]
  195. s0 = s0[mask]
  196. if len(p0) == 0:
  197. lines.append(torch.zeros([1, n_out_line, 2, 2], device=p.device))
  198. score.append(torch.zeros([1, n_out_line], device=p.device))
  199. else:
  200. arg = torch.argsort(s0, descending=True)
  201. p0, s0 = p0[arg], s0[arg]
  202. lines.append(p0[None, torch.arange(n_out_line) % len(p0)])
  203. score.append(s0[None, torch.arange(n_out_line) % len(s0)])
  204. for j in range(len(jcs[i])):
  205. if len(jcs[i][j]) == 0:
  206. jcs[i][j] = torch.zeros([n_out_junc, 2], device=p.device)
  207. jcs[i][j] = jcs[i][j][
  208. None, torch.arange(n_out_junc) % len(jcs[i][j])
  209. ]
  210. result["wires"]["lines"] = torch.cat(lines)
  211. result["wires"]["score"] = torch.cat(score)
  212. result["wires"]["juncs"] = torch.cat([jcs[i][0] for i in range(n_batch)])
  213. if len(jcs[i]) > 1:
  214. result["preds"]["junts"] = torch.cat(
  215. [jcs[i][1] for i in range(n_batch)]
  216. )
  217. print(f'predic result:{result}')
  218. return result
  219. def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
  220. # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
  221. """
  222. Computes the loss for Faster R-CNN.
  223. Args:
  224. class_logits (Tensor)
  225. box_regression (Tensor)
  226. labels (list[BoxList])
  227. regression_targets (Tensor)
  228. Returns:
  229. classification_loss (Tensor)
  230. box_loss (Tensor)
  231. """
  232. labels = torch.cat(labels, dim=0)
  233. regression_targets = torch.cat(regression_targets, dim=0)
  234. classification_loss = F.cross_entropy(class_logits, labels)
  235. # get indices that correspond to the regression targets for
  236. # the corresponding ground truth labels, to be used with
  237. # advanced indexing
  238. sampled_pos_inds_subset = torch.where(labels > 0)[0]
  239. labels_pos = labels[sampled_pos_inds_subset]
  240. N, num_classes = class_logits.shape
  241. box_regression = box_regression.reshape(N, box_regression.size(-1) // 4, 4)
  242. box_loss = F.smooth_l1_loss(
  243. box_regression[sampled_pos_inds_subset, labels_pos],
  244. regression_targets[sampled_pos_inds_subset],
  245. beta=1 / 9,
  246. reduction="sum",
  247. )
  248. box_loss = box_loss / labels.numel()
  249. return classification_loss, box_loss
  250. def maskrcnn_inference(x, labels):
  251. # type: (Tensor, List[Tensor]) -> List[Tensor]
  252. """
  253. From the results of the CNN, post process the masks
  254. by taking the mask corresponding to the class with max
  255. probability (which are of fixed size and directly output
  256. by the CNN) and return the masks in the mask field of the BoxList.
  257. Args:
  258. x (Tensor): the mask logits
  259. labels (list[BoxList]): bounding boxes that are used as
  260. reference, one for ech image
  261. Returns:
  262. results (list[BoxList]): one BoxList for each image, containing
  263. the extra field mask
  264. """
  265. mask_prob = x.sigmoid()
  266. # select masks corresponding to the predicted classes
  267. num_masks = x.shape[0]
  268. boxes_per_image = [label.shape[0] for label in labels]
  269. labels = torch.cat(labels)
  270. index = torch.arange(num_masks, device=labels.device)
  271. mask_prob = mask_prob[index, labels][:, None]
  272. mask_prob = mask_prob.split(boxes_per_image, dim=0)
  273. return mask_prob
  274. def project_masks_on_boxes(gt_masks, boxes, matched_idxs, M):
  275. # type: (Tensor, Tensor, Tensor, int) -> Tensor
  276. """
  277. Given segmentation masks and the bounding boxes corresponding
  278. to the location of the masks in the image, this function
  279. crops and resizes the masks in the position defined by the
  280. boxes. This prepares the masks for them to be fed to the
  281. loss computation as the targets.
  282. """
  283. matched_idxs = matched_idxs.to(boxes)
  284. rois = torch.cat([matched_idxs[:, None], boxes], dim=1)
  285. gt_masks = gt_masks[:, None].to(rois)
  286. return roi_align(gt_masks, rois, (M, M), 1.0)[:, 0]
  287. def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs):
  288. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  289. """
  290. Args:
  291. proposals (list[BoxList])
  292. mask_logits (Tensor)
  293. targets (list[BoxList])
  294. Return:
  295. mask_loss (Tensor): scalar tensor containing the loss
  296. """
  297. discretization_size = mask_logits.shape[-1]
  298. labels = [gt_label[idxs] for gt_label, idxs in zip(gt_labels, mask_matched_idxs)]
  299. mask_targets = [
  300. project_masks_on_boxes(m, p, i, discretization_size) for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)
  301. ]
  302. labels = torch.cat(labels, dim=0)
  303. mask_targets = torch.cat(mask_targets, dim=0)
  304. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  305. # accept empty tensors, so handle it separately
  306. if mask_targets.numel() == 0:
  307. return mask_logits.sum() * 0
  308. mask_loss = F.binary_cross_entropy_with_logits(
  309. mask_logits[torch.arange(labels.shape[0], device=labels.device), labels], mask_targets
  310. )
  311. return mask_loss
  312. def keypoints_to_heatmap(keypoints, rois, heatmap_size):
  313. # type: (Tensor, Tensor, int) -> Tuple[Tensor, Tensor]
  314. offset_x = rois[:, 0]
  315. offset_y = rois[:, 1]
  316. scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
  317. scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
  318. offset_x = offset_x[:, None]
  319. offset_y = offset_y[:, None]
  320. scale_x = scale_x[:, None]
  321. scale_y = scale_y[:, None]
  322. x = keypoints[..., 0]
  323. y = keypoints[..., 1]
  324. x_boundary_inds = x == rois[:, 2][:, None]
  325. y_boundary_inds = y == rois[:, 3][:, None]
  326. x = (x - offset_x) * scale_x
  327. x = x.floor().long()
  328. y = (y - offset_y) * scale_y
  329. y = y.floor().long()
  330. x[x_boundary_inds] = heatmap_size - 1
  331. y[y_boundary_inds] = heatmap_size - 1
  332. valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
  333. vis = keypoints[..., 2] > 0
  334. valid = (valid_loc & vis).long()
  335. lin_ind = y * heatmap_size + x
  336. heatmaps = lin_ind * valid
  337. return heatmaps, valid
  338. def _onnx_heatmaps_to_keypoints(
  339. maps, maps_i, roi_map_width, roi_map_height, widths_i, heights_i, offset_x_i, offset_y_i
  340. ):
  341. num_keypoints = torch.scalar_tensor(maps.size(1), dtype=torch.int64)
  342. width_correction = widths_i / roi_map_width
  343. height_correction = heights_i / roi_map_height
  344. roi_map = F.interpolate(
  345. maps_i[:, None], size=(int(roi_map_height), int(roi_map_width)), mode="bicubic", align_corners=False
  346. )[:, 0]
  347. w = torch.scalar_tensor(roi_map.size(2), dtype=torch.int64)
  348. pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  349. x_int = pos % w
  350. y_int = (pos - x_int) // w
  351. x = (torch.tensor(0.5, dtype=torch.float32) + x_int.to(dtype=torch.float32)) * width_correction.to(
  352. dtype=torch.float32
  353. )
  354. y = (torch.tensor(0.5, dtype=torch.float32) + y_int.to(dtype=torch.float32)) * height_correction.to(
  355. dtype=torch.float32
  356. )
  357. xy_preds_i_0 = x + offset_x_i.to(dtype=torch.float32)
  358. xy_preds_i_1 = y + offset_y_i.to(dtype=torch.float32)
  359. xy_preds_i_2 = torch.ones(xy_preds_i_1.shape, dtype=torch.float32)
  360. xy_preds_i = torch.stack(
  361. [
  362. xy_preds_i_0.to(dtype=torch.float32),
  363. xy_preds_i_1.to(dtype=torch.float32),
  364. xy_preds_i_2.to(dtype=torch.float32),
  365. ],
  366. 0,
  367. )
  368. # TODO: simplify when indexing without rank will be supported by ONNX
  369. base = num_keypoints * num_keypoints + num_keypoints + 1
  370. ind = torch.arange(num_keypoints)
  371. ind = ind.to(dtype=torch.int64) * base
  372. end_scores_i = (
  373. roi_map.index_select(1, y_int.to(dtype=torch.int64))
  374. .index_select(2, x_int.to(dtype=torch.int64))
  375. .view(-1)
  376. .index_select(0, ind.to(dtype=torch.int64))
  377. )
  378. return xy_preds_i, end_scores_i
  379. @torch.jit._script_if_tracing
  380. def _onnx_heatmaps_to_keypoints_loop(
  381. maps, rois, widths_ceil, heights_ceil, widths, heights, offset_x, offset_y, num_keypoints
  382. ):
  383. xy_preds = torch.zeros((0, 3, int(num_keypoints)), dtype=torch.float32, device=maps.device)
  384. end_scores = torch.zeros((0, int(num_keypoints)), dtype=torch.float32, device=maps.device)
  385. for i in range(int(rois.size(0))):
  386. xy_preds_i, end_scores_i = _onnx_heatmaps_to_keypoints(
  387. maps, maps[i], widths_ceil[i], heights_ceil[i], widths[i], heights[i], offset_x[i], offset_y[i]
  388. )
  389. xy_preds = torch.cat((xy_preds.to(dtype=torch.float32), xy_preds_i.unsqueeze(0).to(dtype=torch.float32)), 0)
  390. end_scores = torch.cat(
  391. (end_scores.to(dtype=torch.float32), end_scores_i.to(dtype=torch.float32).unsqueeze(0)), 0
  392. )
  393. return xy_preds, end_scores
  394. def heatmaps_to_keypoints(maps, rois):
  395. """Extract predicted keypoint locations from heatmaps. Output has shape
  396. (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
  397. for each keypoint.
  398. """
  399. # This function converts a discrete image coordinate in a HEATMAP_SIZE x
  400. # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
  401. # consistency with keypoints_to_heatmap_labels by using the conversion from
  402. # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
  403. # continuous coordinate.
  404. offset_x = rois[:, 0]
  405. offset_y = rois[:, 1]
  406. widths = rois[:, 2] - rois[:, 0]
  407. heights = rois[:, 3] - rois[:, 1]
  408. widths = widths.clamp(min=1)
  409. heights = heights.clamp(min=1)
  410. widths_ceil = widths.ceil()
  411. heights_ceil = heights.ceil()
  412. num_keypoints = maps.shape[1]
  413. if torchvision._is_tracing():
  414. xy_preds, end_scores = _onnx_heatmaps_to_keypoints_loop(
  415. maps,
  416. rois,
  417. widths_ceil,
  418. heights_ceil,
  419. widths,
  420. heights,
  421. offset_x,
  422. offset_y,
  423. torch.scalar_tensor(num_keypoints, dtype=torch.int64),
  424. )
  425. return xy_preds.permute(0, 2, 1), end_scores
  426. xy_preds = torch.zeros((len(rois), 3, num_keypoints), dtype=torch.float32, device=maps.device)
  427. end_scores = torch.zeros((len(rois), num_keypoints), dtype=torch.float32, device=maps.device)
  428. for i in range(len(rois)):
  429. roi_map_width = int(widths_ceil[i].item())
  430. roi_map_height = int(heights_ceil[i].item())
  431. width_correction = widths[i] / roi_map_width
  432. height_correction = heights[i] / roi_map_height
  433. roi_map = F.interpolate(
  434. maps[i][:, None], size=(roi_map_height, roi_map_width), mode="bicubic", align_corners=False
  435. )[:, 0]
  436. # roi_map_probs = scores_to_probs(roi_map.copy())
  437. w = roi_map.shape[2]
  438. pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  439. x_int = pos % w
  440. y_int = torch.div(pos - x_int, w, rounding_mode="floor")
  441. # assert (roi_map_probs[k, y_int, x_int] ==
  442. # roi_map_probs[k, :, :].max())
  443. x = (x_int.float() + 0.5) * width_correction
  444. y = (y_int.float() + 0.5) * height_correction
  445. xy_preds[i, 0, :] = x + offset_x[i]
  446. xy_preds[i, 1, :] = y + offset_y[i]
  447. xy_preds[i, 2, :] = 1
  448. end_scores[i, :] = roi_map[torch.arange(num_keypoints, device=roi_map.device), y_int, x_int]
  449. return xy_preds.permute(0, 2, 1), end_scores
  450. def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched_idxs):
  451. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  452. N, K, H, W = keypoint_logits.shape
  453. if H != W:
  454. raise ValueError(
  455. f"keypoint_logits height and width (last two elements of shape) should be equal. Instead got H = {H} and W = {W}"
  456. )
  457. discretization_size = H
  458. heatmaps = []
  459. valid = []
  460. for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_keypoints, keypoint_matched_idxs):
  461. kp = gt_kp_in_image[midx]
  462. heatmaps_per_image, valid_per_image = keypoints_to_heatmap(kp, proposals_per_image, discretization_size)
  463. heatmaps.append(heatmaps_per_image.view(-1))
  464. valid.append(valid_per_image.view(-1))
  465. keypoint_targets = torch.cat(heatmaps, dim=0)
  466. valid = torch.cat(valid, dim=0).to(dtype=torch.uint8)
  467. valid = torch.where(valid)[0]
  468. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  469. # accept empty tensors, so handle it sepaartely
  470. if keypoint_targets.numel() == 0 or len(valid) == 0:
  471. return keypoint_logits.sum() * 0
  472. keypoint_logits = keypoint_logits.view(N * K, H * W)
  473. keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid])
  474. return keypoint_loss
  475. def keypointrcnn_inference(x, boxes):
  476. # type: (Tensor, List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  477. kp_probs = []
  478. kp_scores = []
  479. boxes_per_image = [box.size(0) for box in boxes]
  480. x2 = x.split(boxes_per_image, dim=0)
  481. for xx, bb in zip(x2, boxes):
  482. kp_prob, scores = heatmaps_to_keypoints(xx, bb)
  483. kp_probs.append(kp_prob)
  484. kp_scores.append(scores)
  485. return kp_probs, kp_scores
  486. def _onnx_expand_boxes(boxes, scale):
  487. # type: (Tensor, float) -> Tensor
  488. w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
  489. h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
  490. x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
  491. y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
  492. w_half = w_half.to(dtype=torch.float32) * scale
  493. h_half = h_half.to(dtype=torch.float32) * scale
  494. boxes_exp0 = x_c - w_half
  495. boxes_exp1 = y_c - h_half
  496. boxes_exp2 = x_c + w_half
  497. boxes_exp3 = y_c + h_half
  498. boxes_exp = torch.stack((boxes_exp0, boxes_exp1, boxes_exp2, boxes_exp3), 1)
  499. return boxes_exp
  500. # the next two functions should be merged inside Masker
  501. # but are kept here for the moment while we need them
  502. # temporarily for paste_mask_in_image
  503. def expand_boxes(boxes, scale):
  504. # type: (Tensor, float) -> Tensor
  505. if torchvision._is_tracing():
  506. return _onnx_expand_boxes(boxes, scale)
  507. w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
  508. h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
  509. x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
  510. y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
  511. w_half *= scale
  512. h_half *= scale
  513. boxes_exp = torch.zeros_like(boxes)
  514. boxes_exp[:, 0] = x_c - w_half
  515. boxes_exp[:, 2] = x_c + w_half
  516. boxes_exp[:, 1] = y_c - h_half
  517. boxes_exp[:, 3] = y_c + h_half
  518. return boxes_exp
  519. @torch.jit.unused
  520. def expand_masks_tracing_scale(M, padding):
  521. # type: (int, int) -> float
  522. return torch.tensor(M + 2 * padding).to(torch.float32) / torch.tensor(M).to(torch.float32)
  523. def expand_masks(mask, padding):
  524. # type: (Tensor, int) -> Tuple[Tensor, float]
  525. M = mask.shape[-1]
  526. if torch._C._get_tracing_state(): # could not import is_tracing(), not sure why
  527. scale = expand_masks_tracing_scale(M, padding)
  528. else:
  529. scale = float(M + 2 * padding) / M
  530. padded_mask = F.pad(mask, (padding,) * 4)
  531. return padded_mask, scale
  532. def paste_mask_in_image(mask, box, im_h, im_w):
  533. # type: (Tensor, Tensor, int, int) -> Tensor
  534. TO_REMOVE = 1
  535. w = int(box[2] - box[0] + TO_REMOVE)
  536. h = int(box[3] - box[1] + TO_REMOVE)
  537. w = max(w, 1)
  538. h = max(h, 1)
  539. # Set shape to [batchxCxHxW]
  540. mask = mask.expand((1, 1, -1, -1))
  541. # Resize mask
  542. mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
  543. mask = mask[0][0]
  544. im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device)
  545. x_0 = max(box[0], 0)
  546. x_1 = min(box[2] + 1, im_w)
  547. y_0 = max(box[1], 0)
  548. y_1 = min(box[3] + 1, im_h)
  549. im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])]
  550. return im_mask
  551. def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
  552. one = torch.ones(1, dtype=torch.int64)
  553. zero = torch.zeros(1, dtype=torch.int64)
  554. w = box[2] - box[0] + one
  555. h = box[3] - box[1] + one
  556. w = torch.max(torch.cat((w, one)))
  557. h = torch.max(torch.cat((h, one)))
  558. # Set shape to [batchxCxHxW]
  559. mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
  560. # Resize mask
  561. mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
  562. mask = mask[0][0]
  563. x_0 = torch.max(torch.cat((box[0].unsqueeze(0), zero)))
  564. x_1 = torch.min(torch.cat((box[2].unsqueeze(0) + one, im_w.unsqueeze(0))))
  565. y_0 = torch.max(torch.cat((box[1].unsqueeze(0), zero)))
  566. y_1 = torch.min(torch.cat((box[3].unsqueeze(0) + one, im_h.unsqueeze(0))))
  567. unpaded_im_mask = mask[(y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])]
  568. # TODO : replace below with a dynamic padding when support is added in ONNX
  569. # pad y
  570. zeros_y0 = torch.zeros(y_0, unpaded_im_mask.size(1))
  571. zeros_y1 = torch.zeros(im_h - y_1, unpaded_im_mask.size(1))
  572. concat_0 = torch.cat((zeros_y0, unpaded_im_mask.to(dtype=torch.float32), zeros_y1), 0)[0:im_h, :]
  573. # pad x
  574. zeros_x0 = torch.zeros(concat_0.size(0), x_0)
  575. zeros_x1 = torch.zeros(concat_0.size(0), im_w - x_1)
  576. im_mask = torch.cat((zeros_x0, concat_0, zeros_x1), 1)[:, :im_w]
  577. return im_mask
  578. @torch.jit._script_if_tracing
  579. def _onnx_paste_masks_in_image_loop(masks, boxes, im_h, im_w):
  580. res_append = torch.zeros(0, im_h, im_w)
  581. for i in range(masks.size(0)):
  582. mask_res = _onnx_paste_mask_in_image(masks[i][0], boxes[i], im_h, im_w)
  583. mask_res = mask_res.unsqueeze(0)
  584. res_append = torch.cat((res_append, mask_res))
  585. return res_append
  586. def paste_masks_in_image(masks, boxes, img_shape, padding=1):
  587. # type: (Tensor, Tensor, Tuple[int, int], int) -> Tensor
  588. masks, scale = expand_masks(masks, padding=padding)
  589. boxes = expand_boxes(boxes, scale).to(dtype=torch.int64)
  590. im_h, im_w = img_shape
  591. if torchvision._is_tracing():
  592. return _onnx_paste_masks_in_image_loop(
  593. masks, boxes, torch.scalar_tensor(im_h, dtype=torch.int64), torch.scalar_tensor(im_w, dtype=torch.int64)
  594. )[:, None]
  595. res = [paste_mask_in_image(m[0], b, im_h, im_w) for m, b in zip(masks, boxes)]
  596. if len(res) > 0:
  597. ret = torch.stack(res, dim=0)[:, None]
  598. else:
  599. ret = masks.new_empty((0, 1, im_h, im_w))
  600. return ret
  601. class RoIHeads(nn.Module):
  602. __annotations__ = {
  603. "box_coder": det_utils.BoxCoder,
  604. "proposal_matcher": det_utils.Matcher,
  605. "fg_bg_sampler": det_utils.BalancedPositiveNegativeSampler,
  606. }
  607. def __init__(
  608. self,
  609. box_roi_pool,
  610. box_head,
  611. box_predictor,
  612. line_head,
  613. line_predictor,
  614. # Faster R-CNN training
  615. fg_iou_thresh,
  616. bg_iou_thresh,
  617. batch_size_per_image,
  618. positive_fraction,
  619. bbox_reg_weights,
  620. # Faster R-CNN inference
  621. score_thresh,
  622. nms_thresh,
  623. detections_per_img,
  624. # Mask
  625. mask_roi_pool=None,
  626. mask_head=None,
  627. mask_predictor=None,
  628. keypoint_roi_pool=None,
  629. keypoint_head=None,
  630. keypoint_predictor=None,
  631. ):
  632. super().__init__()
  633. self.box_similarity = box_ops.box_iou
  634. # assign ground-truth boxes for each proposal
  635. self.proposal_matcher = det_utils.Matcher(fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)
  636. self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(batch_size_per_image, positive_fraction)
  637. if bbox_reg_weights is None:
  638. bbox_reg_weights = (10.0, 10.0, 5.0, 5.0)
  639. self.box_coder = det_utils.BoxCoder(bbox_reg_weights)
  640. self.box_roi_pool = box_roi_pool
  641. self.box_head = box_head
  642. self.box_predictor = box_predictor
  643. self.line_head = line_head
  644. self.line_predictor = line_predictor
  645. self.score_thresh = score_thresh
  646. self.nms_thresh = nms_thresh
  647. self.detections_per_img = detections_per_img
  648. self.mask_roi_pool = mask_roi_pool
  649. self.mask_head = mask_head
  650. self.mask_predictor = mask_predictor
  651. self.keypoint_roi_pool = keypoint_roi_pool
  652. self.keypoint_head = keypoint_head
  653. self.keypoint_predictor = keypoint_predictor
  654. def has_line(self):
  655. # if self.mask_roi_pool is None:
  656. # return False
  657. if self.line_head is None:
  658. return False
  659. if self.line_predictor is None:
  660. return False
  661. return True
  662. def has_mask(self):
  663. if self.mask_roi_pool is None:
  664. return False
  665. if self.mask_head is None:
  666. return False
  667. if self.mask_predictor is None:
  668. return False
  669. return True
  670. def has_keypoint(self):
  671. if self.keypoint_roi_pool is None:
  672. return False
  673. if self.keypoint_head is None:
  674. return False
  675. if self.keypoint_predictor is None:
  676. return False
  677. return True
  678. def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):
  679. # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  680. matched_idxs = []
  681. labels = []
  682. for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):
  683. if gt_boxes_in_image.numel() == 0:
  684. # Background image
  685. device = proposals_in_image.device
  686. clamped_matched_idxs_in_image = torch.zeros(
  687. (proposals_in_image.shape[0],), dtype=torch.int64, device=device
  688. )
  689. labels_in_image = torch.zeros((proposals_in_image.shape[0],), dtype=torch.int64, device=device)
  690. else:
  691. # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
  692. match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)
  693. matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)
  694. clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
  695. labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
  696. labels_in_image = labels_in_image.to(dtype=torch.int64)
  697. # Label background (below the low threshold)
  698. bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD
  699. labels_in_image[bg_inds] = 0
  700. # Label ignore proposals (between low and high thresholds)
  701. ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS
  702. labels_in_image[ignore_inds] = -1 # -1 is ignored by sampler
  703. matched_idxs.append(clamped_matched_idxs_in_image)
  704. labels.append(labels_in_image)
  705. return matched_idxs, labels
  706. def subsample(self, labels):
  707. # type: (List[Tensor]) -> List[Tensor]
  708. sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
  709. sampled_inds = []
  710. for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)):
  711. img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0]
  712. sampled_inds.append(img_sampled_inds)
  713. return sampled_inds
  714. def add_gt_proposals(self, proposals, gt_boxes):
  715. # type: (List[Tensor], List[Tensor]) -> List[Tensor]
  716. proposals = [torch.cat((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes)]
  717. return proposals
  718. def check_targets(self, targets):
  719. # type: (Optional[List[Dict[str, Tensor]]]) -> None
  720. if targets is None:
  721. raise ValueError("targets should not be None")
  722. if not all(["boxes" in t for t in targets]):
  723. raise ValueError("Every element of targets should have a boxes key")
  724. if not all(["labels" in t for t in targets]):
  725. raise ValueError("Every element of targets should have a labels key")
  726. if self.has_mask():
  727. if not all(["masks" in t for t in targets]):
  728. raise ValueError("Every element of targets should have a masks key")
  729. def select_training_samples(
  730. self,
  731. proposals, # type: List[Tensor]
  732. targets, # type: Optional[List[Dict[str, Tensor]]]
  733. ):
  734. # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]
  735. self.check_targets(targets)
  736. if targets is None:
  737. raise ValueError("targets should not be None")
  738. dtype = proposals[0].dtype
  739. device = proposals[0].device
  740. gt_boxes = [t["boxes"].to(dtype) for t in targets]
  741. gt_labels = [t["labels"] for t in targets]
  742. # append ground-truth bboxes to propos
  743. proposals = self.add_gt_proposals(proposals, gt_boxes)
  744. # get matching gt indices for each proposal
  745. matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)
  746. # sample a fixed proportion of positive-negative proposals
  747. sampled_inds = self.subsample(labels)
  748. matched_gt_boxes = []
  749. num_images = len(proposals)
  750. for img_id in range(num_images):
  751. img_sampled_inds = sampled_inds[img_id]
  752. proposals[img_id] = proposals[img_id][img_sampled_inds]
  753. labels[img_id] = labels[img_id][img_sampled_inds]
  754. matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]
  755. gt_boxes_in_image = gt_boxes[img_id]
  756. if gt_boxes_in_image.numel() == 0:
  757. gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)
  758. matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])
  759. regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)
  760. return proposals, matched_idxs, labels, regression_targets
  761. def postprocess_detections(
  762. self,
  763. class_logits, # type: Tensor
  764. box_regression, # type: Tensor
  765. proposals, # type: List[Tensor]
  766. image_shapes, # type: List[Tuple[int, int]]
  767. ):
  768. # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
  769. device = class_logits.device
  770. num_classes = class_logits.shape[-1]
  771. boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
  772. pred_boxes = self.box_coder.decode(box_regression, proposals)
  773. pred_scores = F.softmax(class_logits, -1)
  774. pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
  775. pred_scores_list = pred_scores.split(boxes_per_image, 0)
  776. all_boxes = []
  777. all_scores = []
  778. all_labels = []
  779. for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):
  780. boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
  781. # create labels for each prediction
  782. labels = torch.arange(num_classes, device=device)
  783. labels = labels.view(1, -1).expand_as(scores)
  784. # remove predictions with the background label
  785. boxes = boxes[:, 1:]
  786. scores = scores[:, 1:]
  787. labels = labels[:, 1:]
  788. # batch everything, by making every class prediction be a separate instance
  789. boxes = boxes.reshape(-1, 4)
  790. scores = scores.reshape(-1)
  791. labels = labels.reshape(-1)
  792. # remove low scoring boxes
  793. inds = torch.where(scores > self.score_thresh)[0]
  794. boxes, scores, labels = boxes[inds], scores[inds], labels[inds]
  795. # remove empty boxes
  796. keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
  797. boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
  798. # non-maximum suppression, independently done per class
  799. keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
  800. # keep only topk scoring predictions
  801. keep = keep[: self.detections_per_img]
  802. boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
  803. all_boxes.append(boxes)
  804. all_scores.append(scores)
  805. all_labels.append(labels)
  806. return all_boxes, all_scores, all_labels
  807. def forward(
  808. self,
  809. features, # type: Dict[str, Tensor]
  810. proposals, # type: List[Tensor]
  811. image_shapes, # type: List[Tuple[int, int]]
  812. targets=None, # type: Optional[List[Dict[str, Tensor]]]
  813. ):
  814. # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]
  815. """
  816. Args:
  817. features (List[Tensor])
  818. proposals (List[Tensor[N, 4]])
  819. image_shapes (List[Tuple[H, W]])
  820. targets (List[Dict])
  821. """
  822. if targets is not None:
  823. self.training = True
  824. else:
  825. self.training = False
  826. if targets is not None:
  827. for t in targets:
  828. # TODO: https://github.com/pytorch/pytorch/issues/26731
  829. floating_point_types = (torch.float, torch.double, torch.half)
  830. if not t["boxes"].dtype in floating_point_types:
  831. raise TypeError(f"target boxes must of float type, instead got {t['boxes'].dtype}")
  832. if not t["labels"].dtype == torch.int64:
  833. raise TypeError(f"target labels must of int64 type, instead got {t['labels'].dtype}")
  834. if self.has_keypoint():
  835. if not t["keypoints"].dtype == torch.float32:
  836. raise TypeError(f"target keypoints must of float type, instead got {t['keypoints'].dtype}")
  837. if self.training:
  838. proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
  839. else:
  840. labels = None
  841. regression_targets = None
  842. matched_idxs = None
  843. box_features = self.box_roi_pool(features, proposals, image_shapes)
  844. box_features = self.box_head(box_features)
  845. class_logits, box_regression = self.box_predictor(box_features)
  846. result: List[Dict[str, torch.Tensor]] = []
  847. losses = {}
  848. if self.training:
  849. if labels is None:
  850. raise ValueError("labels cannot be None")
  851. if regression_targets is None:
  852. raise ValueError("regression_targets cannot be None")
  853. loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
  854. losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
  855. else:
  856. boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
  857. num_images = len(boxes)
  858. for i in range(num_images):
  859. result.append(
  860. {
  861. "boxes": boxes[i],
  862. "labels": labels[i],
  863. "scores": scores[i],
  864. }
  865. )
  866. features_lcnn = features['0']
  867. if self.has_line():
  868. # print('has line_head')
  869. outputs = self.line_head(features_lcnn)
  870. loss_weight = {'junc_map': 8.0, 'line_map': 0.5, 'junc_offset': 0.25, 'lpos': 1, 'lneg': 1}
  871. x, y, idx, jcs, n_batch, ps, n_out_line, n_out_junc = self.line_predictor(
  872. inputs=outputs, features=features_lcnn, targets=targets)
  873. # # line_loss(multitasklearner)
  874. # if self.training:
  875. # head_result = line_head_loss(targets, outputs, features_lcnn, loss_weight, mode_train=True)
  876. # line_result = line_vectorizer_loss(head_result, x, ys, idx, jcs, n_batch, ps, n_out_line, n_out_junc,
  877. # loss_weight, mode_train=True)
  878. # else:
  879. # head_result = line_head_loss(targets, outputs, features_lcnn, loss_weight, mode_train=False)
  880. # line_result = line_vectorizer_loss(head_result, x, ys, idx, jcs, n_batch, ps, n_out_line, n_out_junc,
  881. # loss_weight, mode_train=False)
  882. if self.training:
  883. rcnn_loss_wirepoint = wirepoint_head_line_loss(targets, outputs, x, y, idx, loss_weight)
  884. loss_wirepoint = {"loss_wirepoint": rcnn_loss_wirepoint}
  885. else:
  886. pred = wirepoint_inference(x, idx, jcs, n_batch, ps, n_out_line, n_out_junc)
  887. result.append(pred)
  888. loss_wirepoint = {}
  889. losses.update(loss_wirepoint)
  890. else:
  891. pass
  892. # print('has not line_head')
  893. if self.has_mask():
  894. mask_proposals = [p["boxes"] for p in result]
  895. if self.training:
  896. if matched_idxs is None:
  897. raise ValueError("if in training, matched_idxs should not be None")
  898. # during training, only focus on positive boxes
  899. num_images = len(proposals)
  900. mask_proposals = []
  901. pos_matched_idxs = []
  902. for img_id in range(num_images):
  903. pos = torch.where(labels[img_id] > 0)[0]
  904. mask_proposals.append(proposals[img_id][pos])
  905. pos_matched_idxs.append(matched_idxs[img_id][pos])
  906. else:
  907. pos_matched_idxs = None
  908. if self.mask_roi_pool is not None:
  909. mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)
  910. mask_features = self.mask_head(mask_features)
  911. mask_logits = self.mask_predictor(mask_features)
  912. else:
  913. raise Exception("Expected mask_roi_pool to be not None")
  914. loss_mask = {}
  915. if self.training:
  916. if targets is None or pos_matched_idxs is None or mask_logits is None:
  917. raise ValueError("targets, pos_matched_idxs, mask_logits cannot be None when training")
  918. gt_masks = [t["masks"] for t in targets]
  919. gt_labels = [t["labels"] for t in targets]
  920. rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs)
  921. loss_mask = {"loss_mask": rcnn_loss_mask}
  922. else:
  923. labels = [r["labels"] for r in result]
  924. masks_probs = maskrcnn_inference(mask_logits, labels)
  925. for mask_prob, r in zip(masks_probs, result):
  926. r["masks"] = mask_prob
  927. losses.update(loss_mask)
  928. # keep none checks in if conditional so torchscript will conditionally
  929. # compile each branch
  930. if (
  931. self.keypoint_roi_pool is not None
  932. and self.keypoint_head is not None
  933. and self.keypoint_predictor is not None
  934. ):
  935. keypoint_proposals = [p["boxes"] for p in result]
  936. if self.training:
  937. # during training, only focus on positive boxes
  938. num_images = len(proposals)
  939. keypoint_proposals = []
  940. pos_matched_idxs = []
  941. if matched_idxs is None:
  942. raise ValueError("if in trainning, matched_idxs should not be None")
  943. for img_id in range(num_images):
  944. pos = torch.where(labels[img_id] > 0)[0]
  945. keypoint_proposals.append(proposals[img_id][pos])
  946. pos_matched_idxs.append(matched_idxs[img_id][pos])
  947. else:
  948. pos_matched_idxs = None
  949. keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes)
  950. keypoint_features = self.keypoint_head(keypoint_features)
  951. keypoint_logits = self.keypoint_predictor(keypoint_features)
  952. loss_keypoint = {}
  953. if self.training:
  954. if targets is None or pos_matched_idxs is None:
  955. raise ValueError("both targets and pos_matched_idxs should not be None when in training mode")
  956. gt_keypoints = [t["keypoints"] for t in targets]
  957. rcnn_loss_keypoint = keypointrcnn_loss(
  958. keypoint_logits, keypoint_proposals, gt_keypoints, pos_matched_idxs
  959. )
  960. loss_keypoint = {"loss_keypoint": rcnn_loss_keypoint}
  961. else:
  962. if keypoint_logits is None or keypoint_proposals is None:
  963. raise ValueError(
  964. "both keypoint_logits and keypoint_proposals should not be None when not in training mode"
  965. )
  966. keypoints_probs, kp_scores = keypointrcnn_inference(keypoint_logits, keypoint_proposals)
  967. for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result):
  968. r["keypoints"] = keypoint_prob
  969. r["keypoints_scores"] = kps
  970. losses.update(loss_keypoint)
  971. return result, losses