roi_heads.py 46 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221
  1. import time
  2. from typing import Dict, List, Optional, Tuple
  3. import torch
  4. import torch.nn.functional as F
  5. import torchvision
  6. from torch import nn, Tensor
  7. from torchvision.ops import boxes as box_ops, roi_align
  8. import libs.vision_libs.models.detection._utils as det_utils
  9. from collections import OrderedDict
  10. from models.wirenet.postprocess import postprocess
  11. def l2loss(input, target):
  12. return ((target - input) ** 2).mean(2).mean(1)
  13. def cross_entropy_loss(logits, positive):
  14. nlogp = -F.log_softmax(logits, dim=0)
  15. return (positive * nlogp[1] + (1 - positive) * nlogp[0]).mean(2).mean(1)
  16. def sigmoid_l1_loss(logits, target, offset=0.0, mask=None):
  17. logp = torch.sigmoid(logits) + offset
  18. loss = torch.abs(logp - target)
  19. if mask is not None:
  20. w = mask.mean(2, True).mean(1, True)
  21. w[w == 0] = 1
  22. loss = loss * (mask / w)
  23. return loss.mean(2).mean(1)
  24. ###计算多头损失
  25. def line_head_loss(input_dict, outputs, feature, loss_weight, mode_train):
  26. # image = input_dict["image"]
  27. # target_b = input_dict["target_b"]
  28. # outputs, feature, aaa = self.backbone(image, target_b, input_dict["mode"]) # train时aaa是损失,val时是box
  29. result = {"feature": feature}
  30. batch, channel, row, col = outputs[0].shape
  31. T = input_dict["target"].copy()
  32. n_jtyp = T["junc_map"].shape[1]
  33. # switch to CNHW
  34. for task in ["junc_map"]:
  35. T[task] = T[task].permute(1, 0, 2, 3)
  36. for task in ["junc_offset"]:
  37. T[task] = T[task].permute(1, 2, 0, 3, 4)
  38. offset = [2, 3, 5]
  39. losses = []
  40. for stack, output in enumerate(outputs):
  41. output = output.transpose(0, 1).reshape([-1, batch, row, col]).contiguous()
  42. jmap = output[0: offset[0]].reshape(n_jtyp, 2, batch, row, col)
  43. lmap = output[offset[0]: offset[1]].squeeze(0)
  44. # print(f"lmap:{lmap.shape}")
  45. joff = output[offset[1]: offset[2]].reshape(n_jtyp, 2, batch, row, col)
  46. if stack == 0:
  47. result["preds"] = {
  48. "jmap": jmap.permute(2, 0, 1, 3, 4).softmax(2)[:, :, 1],
  49. "lmap": lmap.sigmoid(),
  50. "joff": joff.permute(2, 0, 1, 3, 4).sigmoid() - 0.5,
  51. }
  52. if mode_train == False:
  53. return result
  54. L = OrderedDict()
  55. L["jmap"] = sum(
  56. cross_entropy_loss(jmap[i], T["junc_map"][i]) for i in range(n_jtyp)
  57. )
  58. L["lmap"] = (
  59. F.binary_cross_entropy_with_logits(lmap, T["line_map"], reduction="none")
  60. .mean(2)
  61. .mean(1)
  62. )
  63. L["joff"] = sum(
  64. sigmoid_l1_loss(joff[i, j], T["junc_offset"][i, j], -0.5, T["junc_map"][i])
  65. for i in range(n_jtyp)
  66. for j in range(2)
  67. )
  68. for loss_name in L:
  69. L[loss_name].mul_(loss_weight[loss_name])
  70. losses.append(L)
  71. result["losses"] = losses
  72. # result["aaa"] = aaa
  73. return result
  74. # 计算线性损失
  75. def line_vectorizer_loss(result, x, ys, idx, jcs, n_batch, ps, n_out_line, n_out_junc, loss_weight, mode_train):
  76. if mode_train == False:
  77. p = torch.cat(ps)
  78. s = torch.sigmoid(x)
  79. b = s > 0.5
  80. lines = []
  81. score = []
  82. for i in range(n_batch):
  83. p0 = p[idx[i]: idx[i + 1]]
  84. s0 = s[idx[i]: idx[i + 1]]
  85. mask = b[idx[i]: idx[i + 1]]
  86. p0 = p0[mask]
  87. s0 = s0[mask]
  88. if len(p0) == 0:
  89. lines.append(torch.zeros([1, n_out_line, 2, 2], device=p.device))
  90. score.append(torch.zeros([1, n_out_line], device=p.device))
  91. else:
  92. arg = torch.argsort(s0, descending=True)
  93. p0, s0 = p0[arg], s0[arg]
  94. lines.append(p0[None, torch.arange(n_out_line) % len(p0)])
  95. score.append(s0[None, torch.arange(n_out_line) % len(s0)])
  96. for j in range(len(jcs[i])):
  97. if len(jcs[i][j]) == 0:
  98. jcs[i][j] = torch.zeros([n_out_junc, 2], device=p.device)
  99. jcs[i][j] = jcs[i][j][
  100. None, torch.arange(n_out_junc) % len(jcs[i][j])
  101. ]
  102. result["preds"]["lines"] = torch.cat(lines)
  103. result["preds"]["score"] = torch.cat(score)
  104. result["preds"]["juncs"] = torch.cat([jcs[i][0] for i in range(n_batch)])
  105. if len(jcs[i]) > 1:
  106. result["preds"]["junts"] = torch.cat(
  107. [jcs[i][1] for i in range(n_batch)]
  108. )
  109. # if input_dict["mode"] != "testing":
  110. y = torch.cat(ys)
  111. loss = nn.BCEWithLogitsLoss(reduction="none")
  112. loss = loss(x, y)
  113. lpos_mask, lneg_mask = y, 1 - y
  114. loss_lpos, loss_lneg = loss * lpos_mask, loss * lneg_mask
  115. def sum_batch(x):
  116. xs = [x[idx[i]: idx[i + 1]].sum()[None] for i in range(n_batch)]
  117. return torch.cat(xs)
  118. lpos = sum_batch(loss_lpos) / sum_batch(lpos_mask).clamp(min=1)
  119. lneg = sum_batch(loss_lneg) / sum_batch(lneg_mask).clamp(min=1)
  120. result["losses"][0]["lpos"] = lpos * loss_weight["lpos"]
  121. result["losses"][0]["lneg"] = lneg * loss_weight["lneg"]
  122. if mode_train == True:
  123. del result["preds"]
  124. return result
  125. def wirepoint_head_line_loss(targets, output, x, y, idx, loss_weight):
  126. # output, feature: head返回结果
  127. # x, y, idx : line中间生成结果
  128. result = {}
  129. batch, channel, row, col = output.shape
  130. wires_targets = [t["wires"] for t in targets]
  131. wires_targets = wires_targets.copy()
  132. # print(f'wires_target:{wires_targets}')
  133. # 提取所有 'junc_map', 'junc_offset', 'line_map' 的张量
  134. junc_maps = [d["junc_map"] for d in wires_targets]
  135. junc_offsets = [d["junc_offset"] for d in wires_targets]
  136. line_maps = [d["line_map"] for d in wires_targets]
  137. junc_map_tensor = torch.stack(junc_maps, dim=0)
  138. junc_offset_tensor = torch.stack(junc_offsets, dim=0)
  139. line_map_tensor = torch.stack(line_maps, dim=0)
  140. T = {"junc_map": junc_map_tensor, "junc_offset": junc_offset_tensor, "line_map": line_map_tensor}
  141. n_jtyp = T["junc_map"].shape[1]
  142. for task in ["junc_map"]:
  143. T[task] = T[task].permute(1, 0, 2, 3)
  144. for task in ["junc_offset"]:
  145. T[task] = T[task].permute(1, 2, 0, 3, 4)
  146. offset = [2, 3, 5]
  147. losses = []
  148. output = output.transpose(0, 1).reshape([-1, batch, row, col]).contiguous()
  149. jmap = output[0: offset[0]].reshape(n_jtyp, 2, batch, row, col)
  150. lmap = output[offset[0]: offset[1]].squeeze(0)
  151. joff = output[offset[1]: offset[2]].reshape(n_jtyp, 2, batch, row, col)
  152. L = OrderedDict()
  153. L["junc_map"] = sum(
  154. cross_entropy_loss(jmap[i], T["junc_map"][i]) for i in range(n_jtyp)
  155. ).mean()
  156. L["line_map"] = (
  157. F.binary_cross_entropy_with_logits(lmap, T["line_map"], reduction="none")
  158. .mean(2)
  159. .mean(1)
  160. ).mean()
  161. L["junc_offset"] = sum(
  162. sigmoid_l1_loss(joff[i, j], T["junc_offset"][i, j], -0.5, T["junc_map"][i])
  163. for i in range(n_jtyp)
  164. for j in range(2)
  165. ).mean()
  166. for loss_name in L:
  167. L[loss_name].mul_(loss_weight[loss_name])
  168. losses.append(L)
  169. result["losses"] = losses
  170. loss = nn.BCEWithLogitsLoss(reduction="none")
  171. loss = loss(x, y)
  172. lpos_mask, lneg_mask = y, 1 - y
  173. loss_lpos, loss_lneg = loss * lpos_mask, loss * lneg_mask
  174. def sum_batch(x):
  175. xs = [x[idx[i]: idx[i + 1]].sum()[None] for i in range(batch)]
  176. return torch.cat(xs)
  177. lpos = sum_batch(loss_lpos) / sum_batch(lpos_mask).clamp(min=1)
  178. lneg = sum_batch(loss_lneg) / sum_batch(lneg_mask).clamp(min=1)
  179. result["losses"][0]["lpos"] = (lpos * loss_weight["lpos"]).mean()
  180. result["losses"][0]["lneg"] = (lneg * loss_weight["lneg"]).mean()
  181. return result
  182. def wirepoint_inference(input, idx, jcs, n_batch, ps, n_out_line, n_out_junc):
  183. result = {}
  184. result["wires"] = {}
  185. p = torch.cat(ps)
  186. s = torch.sigmoid(input)
  187. b = s > 0.5
  188. lines = []
  189. score = []
  190. # print(f"n_batch:{n_batch}")
  191. for i in range(n_batch):
  192. # print(f"idx:{idx}")
  193. p0 = p[idx[i]: idx[i + 1]]
  194. s0 = s[idx[i]: idx[i + 1]]
  195. mask = b[idx[i]: idx[i + 1]]
  196. p0 = p0[mask]
  197. s0 = s0[mask]
  198. if len(p0) == 0:
  199. lines.append(torch.zeros([1, n_out_line, 2, 2], device=p.device))
  200. score.append(torch.zeros([1, n_out_line], device=p.device))
  201. else:
  202. arg = torch.argsort(s0, descending=True)
  203. p0, s0 = p0[arg], s0[arg]
  204. lines.append(p0[None, torch.arange(n_out_line) % len(p0)])
  205. score.append(s0[None, torch.arange(n_out_line) % len(s0)])
  206. for j in range(len(jcs[i])):
  207. if len(jcs[i][j]) == 0:
  208. jcs[i][j] = torch.zeros([n_out_junc, 2], device=p.device)
  209. jcs[i][j] = jcs[i][j][
  210. None, torch.arange(n_out_junc) % len(jcs[i][j])
  211. ]
  212. result["wires"]["lines"] = torch.cat(lines)
  213. result["wires"]["score"] = torch.cat(score)
  214. result["wires"]["juncs"] = torch.cat([jcs[i][0] for i in range(n_batch)])
  215. if len(jcs[i]) > 1:
  216. result["preds"]["junts"] = torch.cat(
  217. [jcs[i][1] for i in range(n_batch)]
  218. )
  219. return result
  220. def wirepoint_line_target_loss(input, idx, n_batch, ps, n_out_line, targets):
  221. # result = {}
  222. # result["wires"] = {}
  223. p = torch.cat(ps)
  224. s = torch.sigmoid(input)
  225. b = s > 0.5
  226. lines = []
  227. score = []
  228. # print(f"n_batch:{n_batch}")
  229. for i in range(n_batch):
  230. # print(f"idx:{idx}")
  231. p0 = p[idx[i]: idx[i + 1]]
  232. s0 = s[idx[i]: idx[i + 1]]
  233. mask = b[idx[i]: idx[i + 1]]
  234. p0 = p0[mask]
  235. s0 = s0[mask]
  236. if len(p0) == 0:
  237. lines.append(torch.zeros([1, n_out_line, 2, 2], device=p.device))
  238. score.append(torch.zeros([1, n_out_line], device=p.device))
  239. else:
  240. arg = torch.argsort(s0, descending=True)
  241. p0, s0 = p0[arg], s0[arg]
  242. lines.append(p0[None, torch.arange(n_out_line) % len(p0)])
  243. score.append(s0[None, torch.arange(n_out_line) % len(s0)])
  244. line_num_loss = []
  245. line_maps = [sum(t["wires"]["lpre_label"]) for t in targets]
  246. start = time.time()
  247. diag = (512 ** 2 + 512 ** 2) ** 0.5
  248. line1 = torch.cat(lines).cpu().numpy()
  249. score1 = torch.cat(score).cpu().detach().numpy()
  250. for i in range(line1.shape[0]):
  251. line_tmp = line1[i] / 128 * 512
  252. score_tmp = score1[i]
  253. lines2, _ = postprocess(line_tmp, score_tmp, diag * 0.01, 0, False)
  254. line_num_loss.append(len(lines2)-line_maps[i])
  255. end = time.time()
  256. print(end - start)
  257. return sum(line_num_loss) / len(line_num_loss)
  258. def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
  259. # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
  260. """
  261. Computes the loss for Faster R-CNN.
  262. Args:
  263. class_logits (Tensor)
  264. box_regression (Tensor)
  265. labels (list[BoxList])
  266. regression_targets (Tensor)
  267. Returns:
  268. classification_loss (Tensor)
  269. box_loss (Tensor)
  270. """
  271. labels = torch.cat(labels, dim=0)
  272. regression_targets = torch.cat(regression_targets, dim=0)
  273. classification_loss = F.cross_entropy(class_logits, labels)
  274. # get indices that correspond to the regression targets for
  275. # the corresponding ground truth labels, to be used with
  276. # advanced indexing
  277. sampled_pos_inds_subset = torch.where(labels > 0)[0]
  278. labels_pos = labels[sampled_pos_inds_subset]
  279. N, num_classes = class_logits.shape
  280. box_regression = box_regression.reshape(N, box_regression.size(-1) // 4, 4)
  281. box_loss = F.smooth_l1_loss(
  282. box_regression[sampled_pos_inds_subset, labels_pos],
  283. regression_targets[sampled_pos_inds_subset],
  284. beta=1 / 9,
  285. reduction="sum",
  286. )
  287. box_loss = box_loss / labels.numel()
  288. return classification_loss, box_loss
  289. def maskrcnn_inference(x, labels):
  290. # type: (Tensor, List[Tensor]) -> List[Tensor]
  291. """
  292. From the results of the CNN, post process the masks
  293. by taking the mask corresponding to the class with max
  294. probability (which are of fixed size and directly output
  295. by the CNN) and return the masks in the mask field of the BoxList.
  296. Args:
  297. x (Tensor): the mask logits
  298. labels (list[BoxList]): bounding boxes that are used as
  299. reference, one for ech image
  300. Returns:
  301. results (list[BoxList]): one BoxList for each image, containing
  302. the extra field mask
  303. """
  304. mask_prob = x.sigmoid()
  305. # select masks corresponding to the predicted classes
  306. num_masks = x.shape[0]
  307. boxes_per_image = [label.shape[0] for label in labels]
  308. labels = torch.cat(labels)
  309. index = torch.arange(num_masks, device=labels.device)
  310. mask_prob = mask_prob[index, labels][:, None]
  311. mask_prob = mask_prob.split(boxes_per_image, dim=0)
  312. return mask_prob
  313. def project_masks_on_boxes(gt_masks, boxes, matched_idxs, M):
  314. # type: (Tensor, Tensor, Tensor, int) -> Tensor
  315. """
  316. Given segmentation masks and the bounding boxes corresponding
  317. to the location of the masks in the image, this function
  318. crops and resizes the masks in the position defined by the
  319. boxes. This prepares the masks for them to be fed to the
  320. loss computation as the targets.
  321. """
  322. matched_idxs = matched_idxs.to(boxes)
  323. rois = torch.cat([matched_idxs[:, None], boxes], dim=1)
  324. gt_masks = gt_masks[:, None].to(rois)
  325. return roi_align(gt_masks, rois, (M, M), 1.0)[:, 0]
  326. def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs):
  327. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  328. """
  329. Args:
  330. proposals (list[BoxList])
  331. mask_logits (Tensor)
  332. targets (list[BoxList])
  333. Return:
  334. mask_loss (Tensor): scalar tensor containing the loss
  335. """
  336. discretization_size = mask_logits.shape[-1]
  337. labels = [gt_label[idxs] for gt_label, idxs in zip(gt_labels, mask_matched_idxs)]
  338. mask_targets = [
  339. project_masks_on_boxes(m, p, i, discretization_size) for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)
  340. ]
  341. labels = torch.cat(labels, dim=0)
  342. mask_targets = torch.cat(mask_targets, dim=0)
  343. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  344. # accept empty tensors, so handle it separately
  345. if mask_targets.numel() == 0:
  346. return mask_logits.sum() * 0
  347. mask_loss = F.binary_cross_entropy_with_logits(
  348. mask_logits[torch.arange(labels.shape[0], device=labels.device), labels], mask_targets
  349. )
  350. return mask_loss
  351. def keypoints_to_heatmap(keypoints, rois, heatmap_size):
  352. # type: (Tensor, Tensor, int) -> Tuple[Tensor, Tensor]
  353. offset_x = rois[:, 0]
  354. offset_y = rois[:, 1]
  355. scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
  356. scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
  357. offset_x = offset_x[:, None]
  358. offset_y = offset_y[:, None]
  359. scale_x = scale_x[:, None]
  360. scale_y = scale_y[:, None]
  361. x = keypoints[..., 0]
  362. y = keypoints[..., 1]
  363. x_boundary_inds = x == rois[:, 2][:, None]
  364. y_boundary_inds = y == rois[:, 3][:, None]
  365. x = (x - offset_x) * scale_x
  366. x = x.floor().long()
  367. y = (y - offset_y) * scale_y
  368. y = y.floor().long()
  369. x[x_boundary_inds] = heatmap_size - 1
  370. y[y_boundary_inds] = heatmap_size - 1
  371. valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
  372. vis = keypoints[..., 2] > 0
  373. valid = (valid_loc & vis).long()
  374. lin_ind = y * heatmap_size + x
  375. heatmaps = lin_ind * valid
  376. return heatmaps, valid
  377. def _onnx_heatmaps_to_keypoints(
  378. maps, maps_i, roi_map_width, roi_map_height, widths_i, heights_i, offset_x_i, offset_y_i
  379. ):
  380. num_keypoints = torch.scalar_tensor(maps.size(1), dtype=torch.int64)
  381. width_correction = widths_i / roi_map_width
  382. height_correction = heights_i / roi_map_height
  383. roi_map = F.interpolate(
  384. maps_i[:, None], size=(int(roi_map_height), int(roi_map_width)), mode="bicubic", align_corners=False
  385. )[:, 0]
  386. w = torch.scalar_tensor(roi_map.size(2), dtype=torch.int64)
  387. pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  388. x_int = pos % w
  389. y_int = (pos - x_int) // w
  390. x = (torch.tensor(0.5, dtype=torch.float32) + x_int.to(dtype=torch.float32)) * width_correction.to(
  391. dtype=torch.float32
  392. )
  393. y = (torch.tensor(0.5, dtype=torch.float32) + y_int.to(dtype=torch.float32)) * height_correction.to(
  394. dtype=torch.float32
  395. )
  396. xy_preds_i_0 = x + offset_x_i.to(dtype=torch.float32)
  397. xy_preds_i_1 = y + offset_y_i.to(dtype=torch.float32)
  398. xy_preds_i_2 = torch.ones(xy_preds_i_1.shape, dtype=torch.float32)
  399. xy_preds_i = torch.stack(
  400. [
  401. xy_preds_i_0.to(dtype=torch.float32),
  402. xy_preds_i_1.to(dtype=torch.float32),
  403. xy_preds_i_2.to(dtype=torch.float32),
  404. ],
  405. 0,
  406. )
  407. # TODO: simplify when indexing without rank will be supported by ONNX
  408. base = num_keypoints * num_keypoints + num_keypoints + 1
  409. ind = torch.arange(num_keypoints)
  410. ind = ind.to(dtype=torch.int64) * base
  411. end_scores_i = (
  412. roi_map.index_select(1, y_int.to(dtype=torch.int64))
  413. .index_select(2, x_int.to(dtype=torch.int64))
  414. .view(-1)
  415. .index_select(0, ind.to(dtype=torch.int64))
  416. )
  417. return xy_preds_i, end_scores_i
  418. @torch.jit._script_if_tracing
  419. def _onnx_heatmaps_to_keypoints_loop(
  420. maps, rois, widths_ceil, heights_ceil, widths, heights, offset_x, offset_y, num_keypoints
  421. ):
  422. xy_preds = torch.zeros((0, 3, int(num_keypoints)), dtype=torch.float32, device=maps.device)
  423. end_scores = torch.zeros((0, int(num_keypoints)), dtype=torch.float32, device=maps.device)
  424. for i in range(int(rois.size(0))):
  425. xy_preds_i, end_scores_i = _onnx_heatmaps_to_keypoints(
  426. maps, maps[i], widths_ceil[i], heights_ceil[i], widths[i], heights[i], offset_x[i], offset_y[i]
  427. )
  428. xy_preds = torch.cat((xy_preds.to(dtype=torch.float32), xy_preds_i.unsqueeze(0).to(dtype=torch.float32)), 0)
  429. end_scores = torch.cat(
  430. (end_scores.to(dtype=torch.float32), end_scores_i.to(dtype=torch.float32).unsqueeze(0)), 0
  431. )
  432. return xy_preds, end_scores
  433. def heatmaps_to_keypoints(maps, rois):
  434. """Extract predicted keypoint locations from heatmaps. Output has shape
  435. (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
  436. for each keypoint.
  437. """
  438. # This function converts a discrete image coordinate in a HEATMAP_SIZE x
  439. # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
  440. # consistency with keypoints_to_heatmap_labels by using the conversion from
  441. # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
  442. # continuous coordinate.
  443. offset_x = rois[:, 0]
  444. offset_y = rois[:, 1]
  445. widths = rois[:, 2] - rois[:, 0]
  446. heights = rois[:, 3] - rois[:, 1]
  447. widths = widths.clamp(min=1)
  448. heights = heights.clamp(min=1)
  449. widths_ceil = widths.ceil()
  450. heights_ceil = heights.ceil()
  451. num_keypoints = maps.shape[1]
  452. if torchvision._is_tracing():
  453. xy_preds, end_scores = _onnx_heatmaps_to_keypoints_loop(
  454. maps,
  455. rois,
  456. widths_ceil,
  457. heights_ceil,
  458. widths,
  459. heights,
  460. offset_x,
  461. offset_y,
  462. torch.scalar_tensor(num_keypoints, dtype=torch.int64),
  463. )
  464. return xy_preds.permute(0, 2, 1), end_scores
  465. xy_preds = torch.zeros((len(rois), 3, num_keypoints), dtype=torch.float32, device=maps.device)
  466. end_scores = torch.zeros((len(rois), num_keypoints), dtype=torch.float32, device=maps.device)
  467. for i in range(len(rois)):
  468. roi_map_width = int(widths_ceil[i].item())
  469. roi_map_height = int(heights_ceil[i].item())
  470. width_correction = widths[i] / roi_map_width
  471. height_correction = heights[i] / roi_map_height
  472. roi_map = F.interpolate(
  473. maps[i][:, None], size=(roi_map_height, roi_map_width), mode="bicubic", align_corners=False
  474. )[:, 0]
  475. # roi_map_probs = scores_to_probs(roi_map.copy())
  476. w = roi_map.shape[2]
  477. pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  478. x_int = pos % w
  479. y_int = torch.div(pos - x_int, w, rounding_mode="floor")
  480. # assert (roi_map_probs[k, y_int, x_int] ==
  481. # roi_map_probs[k, :, :].max())
  482. x = (x_int.float() + 0.5) * width_correction
  483. y = (y_int.float() + 0.5) * height_correction
  484. xy_preds[i, 0, :] = x + offset_x[i]
  485. xy_preds[i, 1, :] = y + offset_y[i]
  486. xy_preds[i, 2, :] = 1
  487. end_scores[i, :] = roi_map[torch.arange(num_keypoints, device=roi_map.device), y_int, x_int]
  488. return xy_preds.permute(0, 2, 1), end_scores
  489. def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched_idxs):
  490. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  491. N, K, H, W = keypoint_logits.shape
  492. if H != W:
  493. raise ValueError(
  494. f"keypoint_logits height and width (last two elements of shape) should be equal. Instead got H = {H} and W = {W}"
  495. )
  496. discretization_size = H
  497. heatmaps = []
  498. valid = []
  499. for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_keypoints, keypoint_matched_idxs):
  500. kp = gt_kp_in_image[midx]
  501. heatmaps_per_image, valid_per_image = keypoints_to_heatmap(kp, proposals_per_image, discretization_size)
  502. heatmaps.append(heatmaps_per_image.view(-1))
  503. valid.append(valid_per_image.view(-1))
  504. keypoint_targets = torch.cat(heatmaps, dim=0)
  505. valid = torch.cat(valid, dim=0).to(dtype=torch.uint8)
  506. valid = torch.where(valid)[0]
  507. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  508. # accept empty tensors, so handle it sepaartely
  509. if keypoint_targets.numel() == 0 or len(valid) == 0:
  510. return keypoint_logits.sum() * 0
  511. keypoint_logits = keypoint_logits.view(N * K, H * W)
  512. keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid])
  513. return keypoint_loss
  514. def keypointrcnn_inference(x, boxes):
  515. # type: (Tensor, List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  516. kp_probs = []
  517. kp_scores = []
  518. boxes_per_image = [box.size(0) for box in boxes]
  519. x2 = x.split(boxes_per_image, dim=0)
  520. for xx, bb in zip(x2, boxes):
  521. kp_prob, scores = heatmaps_to_keypoints(xx, bb)
  522. kp_probs.append(kp_prob)
  523. kp_scores.append(scores)
  524. return kp_probs, kp_scores
  525. def _onnx_expand_boxes(boxes, scale):
  526. # type: (Tensor, float) -> Tensor
  527. w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
  528. h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
  529. x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
  530. y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
  531. w_half = w_half.to(dtype=torch.float32) * scale
  532. h_half = h_half.to(dtype=torch.float32) * scale
  533. boxes_exp0 = x_c - w_half
  534. boxes_exp1 = y_c - h_half
  535. boxes_exp2 = x_c + w_half
  536. boxes_exp3 = y_c + h_half
  537. boxes_exp = torch.stack((boxes_exp0, boxes_exp1, boxes_exp2, boxes_exp3), 1)
  538. return boxes_exp
  539. # the next two functions should be merged inside Masker
  540. # but are kept here for the moment while we need them
  541. # temporarily for paste_mask_in_image
  542. def expand_boxes(boxes, scale):
  543. # type: (Tensor, float) -> Tensor
  544. if torchvision._is_tracing():
  545. return _onnx_expand_boxes(boxes, scale)
  546. w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
  547. h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
  548. x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
  549. y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
  550. w_half *= scale
  551. h_half *= scale
  552. boxes_exp = torch.zeros_like(boxes)
  553. boxes_exp[:, 0] = x_c - w_half
  554. boxes_exp[:, 2] = x_c + w_half
  555. boxes_exp[:, 1] = y_c - h_half
  556. boxes_exp[:, 3] = y_c + h_half
  557. return boxes_exp
  558. @torch.jit.unused
  559. def expand_masks_tracing_scale(M, padding):
  560. # type: (int, int) -> float
  561. return torch.tensor(M + 2 * padding).to(torch.float32) / torch.tensor(M).to(torch.float32)
  562. def expand_masks(mask, padding):
  563. # type: (Tensor, int) -> Tuple[Tensor, float]
  564. M = mask.shape[-1]
  565. if torch._C._get_tracing_state(): # could not import is_tracing(), not sure why
  566. scale = expand_masks_tracing_scale(M, padding)
  567. else:
  568. scale = float(M + 2 * padding) / M
  569. padded_mask = F.pad(mask, (padding,) * 4)
  570. return padded_mask, scale
  571. def paste_mask_in_image(mask, box, im_h, im_w):
  572. # type: (Tensor, Tensor, int, int) -> Tensor
  573. TO_REMOVE = 1
  574. w = int(box[2] - box[0] + TO_REMOVE)
  575. h = int(box[3] - box[1] + TO_REMOVE)
  576. w = max(w, 1)
  577. h = max(h, 1)
  578. # Set shape to [batchxCxHxW]
  579. mask = mask.expand((1, 1, -1, -1))
  580. # Resize mask
  581. mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
  582. mask = mask[0][0]
  583. im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device)
  584. x_0 = max(box[0], 0)
  585. x_1 = min(box[2] + 1, im_w)
  586. y_0 = max(box[1], 0)
  587. y_1 = min(box[3] + 1, im_h)
  588. im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])]
  589. return im_mask
  590. def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
  591. one = torch.ones(1, dtype=torch.int64)
  592. zero = torch.zeros(1, dtype=torch.int64)
  593. w = box[2] - box[0] + one
  594. h = box[3] - box[1] + one
  595. w = torch.max(torch.cat((w, one)))
  596. h = torch.max(torch.cat((h, one)))
  597. # Set shape to [batchxCxHxW]
  598. mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
  599. # Resize mask
  600. mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
  601. mask = mask[0][0]
  602. x_0 = torch.max(torch.cat((box[0].unsqueeze(0), zero)))
  603. x_1 = torch.min(torch.cat((box[2].unsqueeze(0) + one, im_w.unsqueeze(0))))
  604. y_0 = torch.max(torch.cat((box[1].unsqueeze(0), zero)))
  605. y_1 = torch.min(torch.cat((box[3].unsqueeze(0) + one, im_h.unsqueeze(0))))
  606. unpaded_im_mask = mask[(y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])]
  607. # TODO : replace below with a dynamic padding when support is added in ONNX
  608. # pad y
  609. zeros_y0 = torch.zeros(y_0, unpaded_im_mask.size(1))
  610. zeros_y1 = torch.zeros(im_h - y_1, unpaded_im_mask.size(1))
  611. concat_0 = torch.cat((zeros_y0, unpaded_im_mask.to(dtype=torch.float32), zeros_y1), 0)[0:im_h, :]
  612. # pad x
  613. zeros_x0 = torch.zeros(concat_0.size(0), x_0)
  614. zeros_x1 = torch.zeros(concat_0.size(0), im_w - x_1)
  615. im_mask = torch.cat((zeros_x0, concat_0, zeros_x1), 1)[:, :im_w]
  616. return im_mask
  617. @torch.jit._script_if_tracing
  618. def _onnx_paste_masks_in_image_loop(masks, boxes, im_h, im_w):
  619. res_append = torch.zeros(0, im_h, im_w)
  620. for i in range(masks.size(0)):
  621. mask_res = _onnx_paste_mask_in_image(masks[i][0], boxes[i], im_h, im_w)
  622. mask_res = mask_res.unsqueeze(0)
  623. res_append = torch.cat((res_append, mask_res))
  624. return res_append
  625. def paste_masks_in_image(masks, boxes, img_shape, padding=1):
  626. # type: (Tensor, Tensor, Tuple[int, int], int) -> Tensor
  627. masks, scale = expand_masks(masks, padding=padding)
  628. boxes = expand_boxes(boxes, scale).to(dtype=torch.int64)
  629. im_h, im_w = img_shape
  630. if torchvision._is_tracing():
  631. return _onnx_paste_masks_in_image_loop(
  632. masks, boxes, torch.scalar_tensor(im_h, dtype=torch.int64), torch.scalar_tensor(im_w, dtype=torch.int64)
  633. )[:, None]
  634. res = [paste_mask_in_image(m[0], b, im_h, im_w) for m, b in zip(masks, boxes)]
  635. if len(res) > 0:
  636. ret = torch.stack(res, dim=0)[:, None]
  637. else:
  638. ret = masks.new_empty((0, 1, im_h, im_w))
  639. return ret
  640. class RoIHeads(nn.Module):
  641. __annotations__ = {
  642. "box_coder": det_utils.BoxCoder,
  643. "proposal_matcher": det_utils.Matcher,
  644. "fg_bg_sampler": det_utils.BalancedPositiveNegativeSampler,
  645. }
  646. def __init__(
  647. self,
  648. box_roi_pool,
  649. box_head,
  650. box_predictor,
  651. line_head,
  652. line_predictor,
  653. # Faster R-CNN training
  654. fg_iou_thresh,
  655. bg_iou_thresh,
  656. batch_size_per_image,
  657. positive_fraction,
  658. bbox_reg_weights,
  659. # Faster R-CNN inference
  660. score_thresh,
  661. nms_thresh,
  662. detections_per_img,
  663. # Mask
  664. mask_roi_pool=None,
  665. mask_head=None,
  666. mask_predictor=None,
  667. keypoint_roi_pool=None,
  668. keypoint_head=None,
  669. keypoint_predictor=None,
  670. ):
  671. super().__init__()
  672. self.box_similarity = box_ops.box_iou
  673. # assign ground-truth boxes for each proposal
  674. self.proposal_matcher = det_utils.Matcher(fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)
  675. self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(batch_size_per_image, positive_fraction)
  676. if bbox_reg_weights is None:
  677. bbox_reg_weights = (10.0, 10.0, 5.0, 5.0)
  678. self.box_coder = det_utils.BoxCoder(bbox_reg_weights)
  679. self.box_roi_pool = box_roi_pool
  680. self.box_head = box_head
  681. self.box_predictor = box_predictor
  682. self.line_head = line_head
  683. self.line_predictor = line_predictor
  684. self.score_thresh = score_thresh
  685. self.nms_thresh = nms_thresh
  686. self.detections_per_img = detections_per_img
  687. self.mask_roi_pool = mask_roi_pool
  688. self.mask_head = mask_head
  689. self.mask_predictor = mask_predictor
  690. self.keypoint_roi_pool = keypoint_roi_pool
  691. self.keypoint_head = keypoint_head
  692. self.keypoint_predictor = keypoint_predictor
  693. def has_line(self):
  694. # if self.mask_roi_pool is None:
  695. # return False
  696. if self.line_head is None:
  697. return False
  698. if self.line_predictor is None:
  699. return False
  700. return True
  701. def has_mask(self):
  702. if self.mask_roi_pool is None:
  703. return False
  704. if self.mask_head is None:
  705. return False
  706. if self.mask_predictor is None:
  707. return False
  708. return True
  709. def has_keypoint(self):
  710. if self.keypoint_roi_pool is None:
  711. return False
  712. if self.keypoint_head is None:
  713. return False
  714. if self.keypoint_predictor is None:
  715. return False
  716. return True
  717. def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):
  718. # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  719. matched_idxs = []
  720. labels = []
  721. for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):
  722. if gt_boxes_in_image.numel() == 0:
  723. # Background image
  724. device = proposals_in_image.device
  725. clamped_matched_idxs_in_image = torch.zeros(
  726. (proposals_in_image.shape[0],), dtype=torch.int64, device=device
  727. )
  728. labels_in_image = torch.zeros((proposals_in_image.shape[0],), dtype=torch.int64, device=device)
  729. else:
  730. # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
  731. match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)
  732. matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)
  733. clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
  734. labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
  735. labels_in_image = labels_in_image.to(dtype=torch.int64)
  736. # Label background (below the low threshold)
  737. bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD
  738. labels_in_image[bg_inds] = 0
  739. # Label ignore proposals (between low and high thresholds)
  740. ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS
  741. labels_in_image[ignore_inds] = -1 # -1 is ignored by sampler
  742. matched_idxs.append(clamped_matched_idxs_in_image)
  743. labels.append(labels_in_image)
  744. return matched_idxs, labels
  745. def subsample(self, labels):
  746. # type: (List[Tensor]) -> List[Tensor]
  747. sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
  748. sampled_inds = []
  749. for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)):
  750. img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0]
  751. sampled_inds.append(img_sampled_inds)
  752. return sampled_inds
  753. def add_gt_proposals(self, proposals, gt_boxes):
  754. # type: (List[Tensor], List[Tensor]) -> List[Tensor]
  755. proposals = [torch.cat((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes)]
  756. return proposals
  757. def check_targets(self, targets):
  758. # type: (Optional[List[Dict[str, Tensor]]]) -> None
  759. if targets is None:
  760. raise ValueError("targets should not be None")
  761. if not all(["boxes" in t for t in targets]):
  762. raise ValueError("Every element of targets should have a boxes key")
  763. if not all(["labels" in t for t in targets]):
  764. raise ValueError("Every element of targets should have a labels key")
  765. if self.has_mask():
  766. if not all(["masks" in t for t in targets]):
  767. raise ValueError("Every element of targets should have a masks key")
  768. def select_training_samples(
  769. self,
  770. proposals, # type: List[Tensor]
  771. targets, # type: Optional[List[Dict[str, Tensor]]]
  772. ):
  773. # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]
  774. self.check_targets(targets)
  775. if targets is None:
  776. raise ValueError("targets should not be None")
  777. dtype = proposals[0].dtype
  778. device = proposals[0].device
  779. gt_boxes = [t["boxes"].to(dtype) for t in targets]
  780. gt_labels = [t["labels"] for t in targets]
  781. # append ground-truth bboxes to propos
  782. proposals = self.add_gt_proposals(proposals, gt_boxes)
  783. # get matching gt indices for each proposal
  784. matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)
  785. # sample a fixed proportion of positive-negative proposals
  786. sampled_inds = self.subsample(labels)
  787. matched_gt_boxes = []
  788. num_images = len(proposals)
  789. for img_id in range(num_images):
  790. img_sampled_inds = sampled_inds[img_id]
  791. proposals[img_id] = proposals[img_id][img_sampled_inds]
  792. labels[img_id] = labels[img_id][img_sampled_inds]
  793. matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]
  794. gt_boxes_in_image = gt_boxes[img_id]
  795. if gt_boxes_in_image.numel() == 0:
  796. gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)
  797. matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])
  798. regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)
  799. return proposals, matched_idxs, labels, regression_targets
  800. def postprocess_detections(
  801. self,
  802. class_logits, # type: Tensor
  803. box_regression, # type: Tensor
  804. proposals, # type: List[Tensor]
  805. image_shapes, # type: List[Tuple[int, int]]
  806. ):
  807. # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
  808. device = class_logits.device
  809. num_classes = class_logits.shape[-1]
  810. boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
  811. pred_boxes = self.box_coder.decode(box_regression, proposals)
  812. pred_scores = F.softmax(class_logits, -1)
  813. pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
  814. pred_scores_list = pred_scores.split(boxes_per_image, 0)
  815. all_boxes = []
  816. all_scores = []
  817. all_labels = []
  818. for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):
  819. boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
  820. # create labels for each prediction
  821. labels = torch.arange(num_classes, device=device)
  822. labels = labels.view(1, -1).expand_as(scores)
  823. # remove predictions with the background label
  824. boxes = boxes[:, 1:]
  825. scores = scores[:, 1:]
  826. labels = labels[:, 1:]
  827. # batch everything, by making every class prediction be a separate instance
  828. boxes = boxes.reshape(-1, 4)
  829. scores = scores.reshape(-1)
  830. labels = labels.reshape(-1)
  831. # remove low scoring boxes
  832. inds = torch.where(scores > self.score_thresh)[0]
  833. boxes, scores, labels = boxes[inds], scores[inds], labels[inds]
  834. # remove empty boxes
  835. keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
  836. boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
  837. # non-maximum suppression, independently done per class
  838. keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
  839. # keep only topk scoring predictions
  840. keep = keep[: self.detections_per_img]
  841. boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
  842. all_boxes.append(boxes)
  843. all_scores.append(scores)
  844. all_labels.append(labels)
  845. return all_boxes, all_scores, all_labels
  846. def forward(
  847. self,
  848. features, # type: Dict[str, Tensor]
  849. proposals, # type: List[Tensor]
  850. image_shapes, # type: List[Tuple[int, int]]
  851. targets=None, # type: Optional[List[Dict[str, Tensor]]]
  852. ):
  853. # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]
  854. """
  855. Args:
  856. features (List[Tensor])
  857. proposals (List[Tensor[N, 4]])
  858. image_shapes (List[Tuple[H, W]])
  859. targets (List[Dict])
  860. """
  861. if targets is not None:
  862. self.training = True
  863. else:
  864. self.training = False
  865. if targets is not None:
  866. for t in targets:
  867. # TODO: https://github.com/pytorch/pytorch/issues/26731
  868. floating_point_types = (torch.float, torch.double, torch.half)
  869. if not t["boxes"].dtype in floating_point_types:
  870. raise TypeError(f"target boxes must of float type, instead got {t['boxes'].dtype}")
  871. if not t["labels"].dtype == torch.int64:
  872. raise TypeError(f"target labels must of int64 type, instead got {t['labels'].dtype}")
  873. if self.has_keypoint():
  874. if not t["keypoints"].dtype == torch.float32:
  875. raise TypeError(f"target keypoints must of float type, instead got {t['keypoints'].dtype}")
  876. if self.training:
  877. proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
  878. else:
  879. labels = None
  880. regression_targets = None
  881. matched_idxs = None
  882. box_features = self.box_roi_pool(features, proposals, image_shapes)
  883. box_features = self.box_head(box_features)
  884. class_logits, box_regression = self.box_predictor(box_features)
  885. result: List[Dict[str, torch.Tensor]] = []
  886. losses = {}
  887. if self.training:
  888. if labels is None:
  889. raise ValueError("labels cannot be None")
  890. if regression_targets is None:
  891. raise ValueError("regression_targets cannot be None")
  892. loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
  893. losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
  894. else:
  895. boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
  896. num_images = len(boxes)
  897. for i in range(num_images):
  898. result.append(
  899. {
  900. "boxes": boxes[i],
  901. "labels": labels[i],
  902. "scores": scores[i],
  903. }
  904. )
  905. features_lcnn = features['0']
  906. if self.has_line():
  907. # print('has line_head')
  908. outputs = self.line_head(features_lcnn)
  909. loss_weight = {'junc_map': 8.0, 'line_map': 0.5, 'junc_offset': 0.25, 'lpos': 1, 'lneg': 1}
  910. x, y, idx, jcs, n_batch, ps, n_out_line, n_out_junc = self.line_predictor(
  911. inputs=outputs, features=features_lcnn, targets=targets)
  912. # # line_loss(multitasklearner)
  913. # if self.training:
  914. # head_result = line_head_loss(targets, outputs, features_lcnn, loss_weight, mode_train=True)
  915. # line_result = line_vectorizer_loss(head_result, x, ys, idx, jcs, n_batch, ps, n_out_line, n_out_junc,
  916. # loss_weight, mode_train=True)
  917. # else:
  918. # head_result = line_head_loss(targets, outputs, features_lcnn, loss_weight, mode_train=False)
  919. # line_result = line_vectorizer_loss(head_result, x, ys, idx, jcs, n_batch, ps, n_out_line, n_out_junc,
  920. # loss_weight, mode_train=False)
  921. if self.training:
  922. rcnn_loss_wirepoint = wirepoint_head_line_loss(targets, outputs, x, y, idx, loss_weight)
  923. # line_target_loss = wirepoint_line_target_loss(x, idx, n_batch, ps, n_out_line, targets) # 线的数量差
  924. loss_wirepoint = {"loss_wirepoint": rcnn_loss_wirepoint}
  925. # loss_wirepoint["loss_wirepoint"]["line_target_loss"] = line_target_loss # 线数量差损失
  926. else:
  927. pred = wirepoint_inference(x, idx, jcs, n_batch, ps, n_out_line, n_out_junc)
  928. result.append(pred)
  929. loss_wirepoint = {}
  930. losses.update(loss_wirepoint)
  931. else:
  932. pass
  933. # print('has not line_head')
  934. if self.has_mask():
  935. mask_proposals = [p["boxes"] for p in result]
  936. if self.training:
  937. if matched_idxs is None:
  938. raise ValueError("if in training, matched_idxs should not be None")
  939. # during training, only focus on positive boxes
  940. num_images = len(proposals)
  941. mask_proposals = []
  942. pos_matched_idxs = []
  943. for img_id in range(num_images):
  944. pos = torch.where(labels[img_id] > 0)[0]
  945. mask_proposals.append(proposals[img_id][pos])
  946. pos_matched_idxs.append(matched_idxs[img_id][pos])
  947. else:
  948. pos_matched_idxs = None
  949. if self.mask_roi_pool is not None:
  950. mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)
  951. mask_features = self.mask_head(mask_features)
  952. mask_logits = self.mask_predictor(mask_features)
  953. else:
  954. raise Exception("Expected mask_roi_pool to be not None")
  955. loss_mask = {}
  956. if self.training:
  957. if targets is None or pos_matched_idxs is None or mask_logits is None:
  958. raise ValueError("targets, pos_matched_idxs, mask_logits cannot be None when training")
  959. gt_masks = [t["masks"] for t in targets]
  960. gt_labels = [t["labels"] for t in targets]
  961. rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs)
  962. loss_mask = {"loss_mask": rcnn_loss_mask}
  963. else:
  964. labels = [r["labels"] for r in result]
  965. masks_probs = maskrcnn_inference(mask_logits, labels)
  966. for mask_prob, r in zip(masks_probs, result):
  967. r["masks"] = mask_prob
  968. losses.update(loss_mask)
  969. # keep none checks in if conditional so torchscript will conditionally
  970. # compile each branch
  971. if (
  972. self.keypoint_roi_pool is not None
  973. and self.keypoint_head is not None
  974. and self.keypoint_predictor is not None
  975. ):
  976. keypoint_proposals = [p["boxes"] for p in result]
  977. if self.training:
  978. # during training, only focus on positive boxes
  979. num_images = len(proposals)
  980. keypoint_proposals = []
  981. pos_matched_idxs = []
  982. if matched_idxs is None:
  983. raise ValueError("if in trainning, matched_idxs should not be None")
  984. for img_id in range(num_images):
  985. pos = torch.where(labels[img_id] > 0)[0]
  986. keypoint_proposals.append(proposals[img_id][pos])
  987. pos_matched_idxs.append(matched_idxs[img_id][pos])
  988. else:
  989. pos_matched_idxs = None
  990. keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes)
  991. keypoint_features = self.keypoint_head(keypoint_features)
  992. keypoint_logits = self.keypoint_predictor(keypoint_features)
  993. loss_keypoint = {}
  994. if self.training:
  995. if targets is None or pos_matched_idxs is None:
  996. raise ValueError("both targets and pos_matched_idxs should not be None when in training mode")
  997. gt_keypoints = [t["keypoints"] for t in targets]
  998. rcnn_loss_keypoint = keypointrcnn_loss(
  999. keypoint_logits, keypoint_proposals, gt_keypoints, pos_matched_idxs
  1000. )
  1001. loss_keypoint = {"loss_keypoint": rcnn_loss_keypoint}
  1002. else:
  1003. if keypoint_logits is None or keypoint_proposals is None:
  1004. raise ValueError(
  1005. "both keypoint_logits and keypoint_proposals should not be None when not in training mode"
  1006. )
  1007. keypoints_probs, kp_scores = keypointrcnn_inference(keypoint_logits, keypoint_proposals)
  1008. for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result):
  1009. r["keypoints"] = keypoint_prob
  1010. r["keypoints_scores"] = kps
  1011. losses.update(loss_keypoint)
  1012. return result, losses