head.py 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290
  1. from collections import OrderedDict
  2. from typing import Dict, List, Optional, Tuple
  3. import matplotlib.pyplot as plt
  4. import torch
  5. import torch.nn.functional as F
  6. import torchvision
  7. from torch import nn, Tensor
  8. from torchvision.ops import boxes as box_ops, roi_align
  9. from . import _utils as det_utils
  10. from torch.utils.data.dataloader import default_collate
  11. def l2loss(input, target):
  12. return ((target - input) ** 2).mean(2).mean(1)
  13. def cross_entropy_loss(logits, positive):
  14. nlogp = -F.log_softmax(logits, dim=0)
  15. return (positive * nlogp[1] + (1 - positive) * nlogp[0]).mean(2).mean(1)
  16. def sigmoid_l1_loss(logits, target, offset=0.0, mask=None):
  17. logp = torch.sigmoid(logits) + offset
  18. loss = torch.abs(logp - target)
  19. if mask is not None:
  20. w = mask.mean(2, True).mean(1, True)
  21. w[w == 0] = 1
  22. loss = loss * (mask / w)
  23. return loss.mean(2).mean(1)
  24. # def wirepoint_loss(target, outputs, feature, loss_weight,mode):
  25. # wires = target['wires']
  26. # result = {"feature": feature}
  27. # batch, channel, row, col = outputs[0].shape
  28. # print(f"Initial Output[0] shape: {outputs[0].shape}") # 打印初始输出形状
  29. # print(f"Total Stacks: {len(outputs)}") # 打印堆栈数
  30. #
  31. # T = wires.copy()
  32. # n_jtyp = T["junc_map"].shape[1]
  33. # for task in ["junc_map"]:
  34. # T[task] = T[task].permute(1, 0, 2, 3)
  35. # for task in ["junc_offset"]:
  36. # T[task] = T[task].permute(1, 2, 0, 3, 4)
  37. #
  38. # offset = self.head_off
  39. # loss_weight = loss_weight
  40. # losses = []
  41. #
  42. # for stack, output in enumerate(outputs):
  43. # output = output.transpose(0, 1).reshape([-1, batch, row, col]).contiguous()
  44. # print(f"Stack {stack} output shape: {output.shape}") # 打印每层的输出形状
  45. # jmap = output[0: offset[0]].reshape(n_jtyp, 2, batch, row, col)
  46. # lmap = output[offset[0]: offset[1]].squeeze(0)
  47. # joff = output[offset[1]: offset[2]].reshape(n_jtyp, 2, batch, row, col)
  48. #
  49. # if stack == 0:
  50. # result["preds"] = {
  51. # "jmap": jmap.permute(2, 0, 1, 3, 4).softmax(2)[:, :, 1],
  52. # "lmap": lmap.sigmoid(),
  53. # "joff": joff.permute(2, 0, 1, 3, 4).sigmoid() - 0.5,
  54. # }
  55. # # visualize_feature_map(jmap[0, 0], title=f"jmap - Stack {stack}")
  56. # # visualize_feature_map(lmap, title=f"lmap - Stack {stack}")
  57. # # visualize_feature_map(joff[0, 0], title=f"joff - Stack {stack}")
  58. #
  59. # if mode == "testing":
  60. # return result
  61. #
  62. # L = OrderedDict()
  63. # L["junc_map"] = sum(
  64. # cross_entropy_loss(jmap[i], T["junc_map"][i]) for i in range(n_jtyp)
  65. # )
  66. # L["line_map"] = (
  67. # F.binary_cross_entropy_with_logits(lmap, T["line_map"], reduction="none")
  68. # .mean(2)
  69. # .mean(1)
  70. # )
  71. # L["junc_offset"] = sum(
  72. # sigmoid_l1_loss(joff[i, j], T["junc_offset"][i, j], -0.5, T["junc_map"][i])
  73. # for i in range(n_jtyp)
  74. # for j in range(2)
  75. # )
  76. # for loss_name in L:
  77. # L[loss_name].mul_(loss_weight[loss_name])
  78. # losses.append(L)
  79. #
  80. # result["losses"] = losses
  81. # return result
  82. def wirepoint_head_line_loss(targets, output, x, y, idx, loss_weight):
  83. # output, feature: head返回结果
  84. # x, y, idx : line中间生成结果
  85. result = {}
  86. batch, channel, row, col = output.shape
  87. wires_targets = [t["wires"] for t in targets]
  88. wires_targets = wires_targets.copy()
  89. # print(f'wires_target:{wires_targets}')
  90. # 提取所有 'junc_map', 'junc_offset', 'line_map' 的张量
  91. junc_maps = [d["junc_map"] for d in wires_targets]
  92. junc_offsets = [d["junc_offset"] for d in wires_targets]
  93. line_maps = [d["line_map"] for d in wires_targets]
  94. junc_map_tensor = torch.stack(junc_maps, dim=0)
  95. junc_offset_tensor = torch.stack(junc_offsets, dim=0)
  96. line_map_tensor = torch.stack(line_maps, dim=0)
  97. T = {"junc_map": junc_map_tensor, "junc_offset": junc_offset_tensor, "line_map": line_map_tensor}
  98. n_jtyp = T["junc_map"].shape[1]
  99. for task in ["junc_map"]:
  100. T[task] = T[task].permute(1, 0, 2, 3)
  101. for task in ["junc_offset"]:
  102. T[task] = T[task].permute(1, 2, 0, 3, 4)
  103. offset = [2, 3, 5]
  104. losses = []
  105. output = output.transpose(0, 1).reshape([-1, batch, row, col]).contiguous()
  106. jmap = output[0: offset[0]].reshape(n_jtyp, 2, batch, row, col)
  107. lmap = output[offset[0]: offset[1]].squeeze(0)
  108. joff = output[offset[1]: offset[2]].reshape(n_jtyp, 2, batch, row, col)
  109. L = OrderedDict()
  110. L["junc_map"] = sum(
  111. cross_entropy_loss(jmap[i], T["junc_map"][i]) for i in range(n_jtyp)
  112. )
  113. L["line_map"] = (
  114. F.binary_cross_entropy_with_logits(lmap, T["line_map"], reduction="none")
  115. .mean(2)
  116. .mean(1)
  117. )
  118. L["junc_offset"] = sum(
  119. sigmoid_l1_loss(joff[i, j], T["junc_offset"][i, j], -0.5, T["junc_map"][i])
  120. for i in range(n_jtyp)
  121. for j in range(2)
  122. )
  123. for loss_name in L:
  124. L[loss_name].mul_(loss_weight[loss_name])
  125. losses.append(L)
  126. result["losses"] = losses
  127. loss = nn.BCEWithLogitsLoss(reduction="none")
  128. loss = loss(x, y)
  129. lpos_mask, lneg_mask = y, 1 - y
  130. loss_lpos, loss_lneg = loss * lpos_mask, loss * lneg_mask
  131. def sum_batch(x):
  132. xs = [x[idx[i]: idx[i + 1]].sum()[None] for i in range(batch)]
  133. return torch.cat(xs)
  134. lpos = sum_batch(loss_lpos) / sum_batch(lpos_mask).clamp(min=1)
  135. lneg = sum_batch(loss_lneg) / sum_batch(lneg_mask).clamp(min=1)
  136. result["losses"][0]["lpos"] = lpos * loss_weight["lpos"]
  137. result["losses"][0]["lneg"] = lneg * loss_weight["lneg"]
  138. return result
  139. def wirepoint_inference(input, idx, jcs, n_batch, ps, n_out_line, n_out_junc):
  140. result = {}
  141. result["wires"] = {}
  142. p = torch.cat(ps)
  143. s = torch.sigmoid(input)
  144. b = s > 0.5
  145. lines = []
  146. score = []
  147. # print(f"n_batch:{n_batch}")
  148. for i in range(n_batch):
  149. # print(f"idx:{idx}")
  150. p0 = p[idx[i]: idx[i + 1]]
  151. s0 = s[idx[i]: idx[i + 1]]
  152. mask = b[idx[i]: idx[i + 1]]
  153. p0 = p0[mask]
  154. s0 = s0[mask]
  155. if len(p0) == 0:
  156. lines.append(torch.zeros([1, n_out_line, 2, 2], device=p.device))
  157. score.append(torch.zeros([1, n_out_line], device=p.device))
  158. else:
  159. arg = torch.argsort(s0, descending=True)
  160. p0, s0 = p0[arg], s0[arg]
  161. lines.append(p0[None, torch.arange(n_out_line) % len(p0)])
  162. score.append(s0[None, torch.arange(n_out_line) % len(s0)])
  163. for j in range(len(jcs[i])):
  164. if len(jcs[i][j]) == 0:
  165. jcs[i][j] = torch.zeros([n_out_junc, 2], device=p.device)
  166. jcs[i][j] = jcs[i][j][
  167. None, torch.arange(n_out_junc) % len(jcs[i][j])
  168. ]
  169. result["wires"]["lines"] = torch.cat(lines)
  170. result["wires"]["score"] = torch.cat(score)
  171. result["wires"]["juncs"] = torch.cat([jcs[i][0] for i in range(n_batch)])
  172. if len(jcs[i]) > 1:
  173. result["preds"]["junts"] = torch.cat(
  174. [jcs[i][1] for i in range(n_batch)]
  175. )
  176. return result
  177. def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
  178. # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
  179. """
  180. Computes the loss for Faster R-CNN.
  181. Args:
  182. class_logits (Tensor)
  183. box_regression (Tensor)
  184. labels (list[BoxList])
  185. regression_targets (Tensor)
  186. Returns:
  187. classification_loss (Tensor)
  188. box_loss (Tensor)
  189. """
  190. labels = torch.cat(labels, dim=0)
  191. regression_targets = torch.cat(regression_targets, dim=0)
  192. classification_loss = F.cross_entropy(class_logits, labels)
  193. # get indices that correspond to the regression targets for
  194. # the corresponding ground truth labels, to be used with
  195. # advanced indexing
  196. sampled_pos_inds_subset = torch.where(labels > 0)[0]
  197. labels_pos = labels[sampled_pos_inds_subset]
  198. N, num_classes = class_logits.shape
  199. box_regression = box_regression.reshape(N, box_regression.size(-1) // 4, 4)
  200. box_loss = F.smooth_l1_loss(
  201. box_regression[sampled_pos_inds_subset, labels_pos],
  202. regression_targets[sampled_pos_inds_subset],
  203. beta=1 / 9,
  204. reduction="sum",
  205. )
  206. box_loss = box_loss / labels.numel()
  207. return classification_loss, box_loss
  208. def maskrcnn_inference(x, labels):
  209. # type: (Tensor, List[Tensor]) -> List[Tensor]
  210. """
  211. From the results of the CNN, post process the masks
  212. by taking the mask corresponding to the class with max
  213. probability (which are of fixed size and directly output
  214. by the CNN) and return the masks in the mask field of the BoxList.
  215. Args:
  216. x (Tensor): the mask logits
  217. labels (list[BoxList]): bounding boxes that are used as
  218. reference, one for ech image
  219. Returns:
  220. results (list[BoxList]): one BoxList for each image, containing
  221. the extra field mask
  222. """
  223. mask_prob = x.sigmoid()
  224. # select masks corresponding to the predicted classes
  225. num_masks = x.shape[0]
  226. boxes_per_image = [label.shape[0] for label in labels]
  227. labels = torch.cat(labels)
  228. index = torch.arange(num_masks, device=labels.device)
  229. mask_prob = mask_prob[index, labels][:, None]
  230. mask_prob = mask_prob.split(boxes_per_image, dim=0)
  231. return mask_prob
  232. def project_masks_on_boxes(gt_masks, boxes, matched_idxs, M):
  233. # type: (Tensor, Tensor, Tensor, int) -> Tensor
  234. """
  235. Given segmentation masks and the bounding boxes corresponding
  236. to the location of the masks in the image, this function
  237. crops and resizes the masks in the position defined by the
  238. boxes. This prepares the masks for them to be fed to the
  239. loss computation as the targets.
  240. """
  241. matched_idxs = matched_idxs.to(boxes)
  242. rois = torch.cat([matched_idxs[:, None], boxes], dim=1)
  243. gt_masks = gt_masks[:, None].to(rois)
  244. return roi_align(gt_masks, rois, (M, M), 1.0)[:, 0]
  245. def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs):
  246. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  247. """
  248. Args:
  249. proposals (list[BoxList])
  250. mask_logits (Tensor)
  251. targets (list[BoxList])
  252. Return:
  253. mask_loss (Tensor): scalar tensor containing the loss
  254. """
  255. discretization_size = mask_logits.shape[-1]
  256. # print(f'mask_logits:{mask_logits},gt_masks:{gt_masks},,gt_labels:{gt_labels}]')
  257. # print(f'mask discretization_size:{discretization_size}')
  258. labels = [gt_label[idxs] for gt_label, idxs in zip(gt_labels, mask_matched_idxs)]
  259. # print(f'mask labels:{labels}')
  260. mask_targets = [
  261. project_masks_on_boxes(m, p, i, discretization_size) for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)
  262. ]
  263. labels = torch.cat(labels, dim=0)
  264. # print(f'mask labels1:{labels}')
  265. mask_targets = torch.cat(mask_targets, dim=0)
  266. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  267. # accept empty tensors, so handle it separately
  268. if mask_targets.numel() == 0:
  269. return mask_logits.sum() * 0
  270. # print(f'mask_targets:{mask_targets.shape},mask_logits:{mask_logits.shape}')
  271. # print(f'mask_targets:{mask_targets}')
  272. mask_loss = F.binary_cross_entropy_with_logits(
  273. mask_logits[torch.arange(labels.shape[0], device=labels.device), labels], mask_targets
  274. )
  275. # print(f'mask_loss:{mask_loss}')
  276. return mask_loss
  277. def keypoints_to_heatmap(keypoints, rois, heatmap_size):
  278. # type: (Tensor, Tensor, int) -> Tuple[Tensor, Tensor]
  279. offset_x = rois[:, 0]
  280. offset_y = rois[:, 1]
  281. scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
  282. scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
  283. offset_x = offset_x[:, None]
  284. offset_y = offset_y[:, None]
  285. scale_x = scale_x[:, None]
  286. scale_y = scale_y[:, None]
  287. x = keypoints[..., 0]
  288. y = keypoints[..., 1]
  289. x_boundary_inds = x == rois[:, 2][:, None]
  290. y_boundary_inds = y == rois[:, 3][:, None]
  291. x = (x - offset_x) * scale_x
  292. x = x.floor().long()
  293. y = (y - offset_y) * scale_y
  294. y = y.floor().long()
  295. x[x_boundary_inds] = heatmap_size - 1
  296. y[y_boundary_inds] = heatmap_size - 1
  297. valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
  298. vis = keypoints[..., 2] > 0
  299. valid = (valid_loc & vis).long()
  300. lin_ind = y * heatmap_size + x
  301. heatmaps = lin_ind * valid
  302. return heatmaps, valid
  303. def _onnx_heatmaps_to_keypoints(
  304. maps, maps_i, roi_map_width, roi_map_height, widths_i, heights_i, offset_x_i, offset_y_i
  305. ):
  306. num_keypoints = torch.scalar_tensor(maps.size(1), dtype=torch.int64)
  307. width_correction = widths_i / roi_map_width
  308. height_correction = heights_i / roi_map_height
  309. roi_map = F.interpolate(
  310. maps_i[:, None], size=(int(roi_map_height), int(roi_map_width)), mode="bicubic", align_corners=False
  311. )[:, 0]
  312. w = torch.scalar_tensor(roi_map.size(2), dtype=torch.int64)
  313. pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  314. x_int = pos % w
  315. y_int = (pos - x_int) // w
  316. x = (torch.tensor(0.5, dtype=torch.float32) + x_int.to(dtype=torch.float32)) * width_correction.to(
  317. dtype=torch.float32
  318. )
  319. y = (torch.tensor(0.5, dtype=torch.float32) + y_int.to(dtype=torch.float32)) * height_correction.to(
  320. dtype=torch.float32
  321. )
  322. xy_preds_i_0 = x + offset_x_i.to(dtype=torch.float32)
  323. xy_preds_i_1 = y + offset_y_i.to(dtype=torch.float32)
  324. xy_preds_i_2 = torch.ones(xy_preds_i_1.shape, dtype=torch.float32)
  325. xy_preds_i = torch.stack(
  326. [
  327. xy_preds_i_0.to(dtype=torch.float32),
  328. xy_preds_i_1.to(dtype=torch.float32),
  329. xy_preds_i_2.to(dtype=torch.float32),
  330. ],
  331. 0,
  332. )
  333. # TODO: simplify when indexing without rank will be supported by ONNX
  334. base = num_keypoints * num_keypoints + num_keypoints + 1
  335. ind = torch.arange(num_keypoints)
  336. ind = ind.to(dtype=torch.int64) * base
  337. end_scores_i = (
  338. roi_map.index_select(1, y_int.to(dtype=torch.int64))
  339. .index_select(2, x_int.to(dtype=torch.int64))
  340. .view(-1)
  341. .index_select(0, ind.to(dtype=torch.int64))
  342. )
  343. return xy_preds_i, end_scores_i
  344. @torch.jit._script_if_tracing
  345. def _onnx_heatmaps_to_keypoints_loop(
  346. maps, rois, widths_ceil, heights_ceil, widths, heights, offset_x, offset_y, num_keypoints
  347. ):
  348. xy_preds = torch.zeros((0, 3, int(num_keypoints)), dtype=torch.float32, device=maps.device)
  349. end_scores = torch.zeros((0, int(num_keypoints)), dtype=torch.float32, device=maps.device)
  350. for i in range(int(rois.size(0))):
  351. xy_preds_i, end_scores_i = _onnx_heatmaps_to_keypoints(
  352. maps, maps[i], widths_ceil[i], heights_ceil[i], widths[i], heights[i], offset_x[i], offset_y[i]
  353. )
  354. xy_preds = torch.cat((xy_preds.to(dtype=torch.float32), xy_preds_i.unsqueeze(0).to(dtype=torch.float32)), 0)
  355. end_scores = torch.cat(
  356. (end_scores.to(dtype=torch.float32), end_scores_i.to(dtype=torch.float32).unsqueeze(0)), 0
  357. )
  358. return xy_preds, end_scores
  359. def heatmaps_to_keypoints(maps, rois):
  360. """Extract predicted keypoint locations from heatmaps. Output has shape
  361. (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
  362. for each keypoint.
  363. """
  364. # This function converts a discrete image coordinate in a HEATMAP_SIZE x
  365. # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
  366. # consistency with keypoints_to_heatmap_labels by using the conversion from
  367. # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
  368. # continuous coordinate.
  369. offset_x = rois[:, 0]
  370. offset_y = rois[:, 1]
  371. widths = rois[:, 2] - rois[:, 0]
  372. heights = rois[:, 3] - rois[:, 1]
  373. widths = widths.clamp(min=1)
  374. heights = heights.clamp(min=1)
  375. widths_ceil = widths.ceil()
  376. heights_ceil = heights.ceil()
  377. num_keypoints = maps.shape[1]
  378. if torchvision._is_tracing():
  379. xy_preds, end_scores = _onnx_heatmaps_to_keypoints_loop(
  380. maps,
  381. rois,
  382. widths_ceil,
  383. heights_ceil,
  384. widths,
  385. heights,
  386. offset_x,
  387. offset_y,
  388. torch.scalar_tensor(num_keypoints, dtype=torch.int64),
  389. )
  390. return xy_preds.permute(0, 2, 1), end_scores
  391. xy_preds = torch.zeros((len(rois), 3, num_keypoints), dtype=torch.float32, device=maps.device)
  392. end_scores = torch.zeros((len(rois), num_keypoints), dtype=torch.float32, device=maps.device)
  393. for i in range(len(rois)):
  394. roi_map_width = int(widths_ceil[i].item())
  395. roi_map_height = int(heights_ceil[i].item())
  396. width_correction = widths[i] / roi_map_width
  397. height_correction = heights[i] / roi_map_height
  398. roi_map = F.interpolate(
  399. maps[i][:, None], size=(roi_map_height, roi_map_width), mode="bicubic", align_corners=False
  400. )[:, 0]
  401. # roi_map_probs = scores_to_probs(roi_map.copy())
  402. w = roi_map.shape[2]
  403. pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  404. x_int = pos % w
  405. y_int = torch.div(pos - x_int, w, rounding_mode="floor")
  406. # assert (roi_map_probs[k, y_int, x_int] ==
  407. # roi_map_probs[k, :, :].max())
  408. x = (x_int.float() + 0.5) * width_correction
  409. y = (y_int.float() + 0.5) * height_correction
  410. xy_preds[i, 0, :] = x + offset_x[i]
  411. xy_preds[i, 1, :] = y + offset_y[i]
  412. xy_preds[i, 2, :] = 1
  413. end_scores[i, :] = roi_map[torch.arange(num_keypoints, device=roi_map.device), y_int, x_int]
  414. return xy_preds.permute(0, 2, 1), end_scores
  415. def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched_idxs):
  416. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  417. N, K, H, W = keypoint_logits.shape
  418. if H != W:
  419. raise ValueError(
  420. f"keypoint_logits height and width (last two elements of shape) should be equal. Instead got H = {H} and W = {W}"
  421. )
  422. discretization_size = H
  423. heatmaps = []
  424. valid = []
  425. for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_keypoints, keypoint_matched_idxs):
  426. kp = gt_kp_in_image[midx]
  427. heatmaps_per_image, valid_per_image = keypoints_to_heatmap(kp, proposals_per_image, discretization_size)
  428. heatmaps.append(heatmaps_per_image.view(-1))
  429. valid.append(valid_per_image.view(-1))
  430. keypoint_targets = torch.cat(heatmaps, dim=0)
  431. valid = torch.cat(valid, dim=0).to(dtype=torch.uint8)
  432. valid = torch.where(valid)[0]
  433. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  434. # accept empty tensors, so handle it sepaartely
  435. if keypoint_targets.numel() == 0 or len(valid) == 0:
  436. return keypoint_logits.sum() * 0
  437. keypoint_logits = keypoint_logits.view(N * K, H * W)
  438. keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid])
  439. return keypoint_loss
  440. def keypointrcnn_inference(x, boxes):
  441. # print(f'x:{x.shape}')
  442. # type: (Tensor, List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  443. kp_probs = []
  444. kp_scores = []
  445. boxes_per_image = [box.size(0) for box in boxes]
  446. x2 = x.split(boxes_per_image, dim=0)
  447. # print(f'x2:{x2}')
  448. for xx, bb in zip(x2, boxes):
  449. kp_prob, scores = heatmaps_to_keypoints(xx, bb)
  450. kp_probs.append(kp_prob)
  451. kp_scores.append(scores)
  452. return kp_probs, kp_scores
  453. def _onnx_expand_boxes(boxes, scale):
  454. # type: (Tensor, float) -> Tensor
  455. w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
  456. h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
  457. x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
  458. y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
  459. w_half = w_half.to(dtype=torch.float32) * scale
  460. h_half = h_half.to(dtype=torch.float32) * scale
  461. boxes_exp0 = x_c - w_half
  462. boxes_exp1 = y_c - h_half
  463. boxes_exp2 = x_c + w_half
  464. boxes_exp3 = y_c + h_half
  465. boxes_exp = torch.stack((boxes_exp0, boxes_exp1, boxes_exp2, boxes_exp3), 1)
  466. return boxes_exp
  467. # the next two functions should be merged inside Masker
  468. # but are kept here for the moment while we need them
  469. # temporarily for paste_mask_in_image
  470. def expand_boxes(boxes, scale):
  471. # type: (Tensor, float) -> Tensor
  472. if torchvision._is_tracing():
  473. return _onnx_expand_boxes(boxes, scale)
  474. w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
  475. h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
  476. x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
  477. y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
  478. w_half *= scale
  479. h_half *= scale
  480. boxes_exp = torch.zeros_like(boxes)
  481. boxes_exp[:, 0] = x_c - w_half
  482. boxes_exp[:, 2] = x_c + w_half
  483. boxes_exp[:, 1] = y_c - h_half
  484. boxes_exp[:, 3] = y_c + h_half
  485. return boxes_exp
  486. @torch.jit.unused
  487. def expand_masks_tracing_scale(M, padding):
  488. # type: (int, int) -> float
  489. return torch.tensor(M + 2 * padding).to(torch.float32) / torch.tensor(M).to(torch.float32)
  490. def expand_masks(mask, padding):
  491. # type: (Tensor, int) -> Tuple[Tensor, float]
  492. M = mask.shape[-1]
  493. if torch._C._get_tracing_state(): # could not import is_tracing(), not sure why
  494. scale = expand_masks_tracing_scale(M, padding)
  495. else:
  496. scale = float(M + 2 * padding) / M
  497. padded_mask = F.pad(mask, (padding,) * 4)
  498. return padded_mask, scale
  499. def paste_mask_in_image(mask, box, im_h, im_w):
  500. # type: (Tensor, Tensor, int, int) -> Tensor
  501. TO_REMOVE = 1
  502. w = int(box[2] - box[0] + TO_REMOVE)
  503. h = int(box[3] - box[1] + TO_REMOVE)
  504. w = max(w, 1)
  505. h = max(h, 1)
  506. # Set shape to [batchxCxHxW]
  507. mask = mask.expand((1, 1, -1, -1))
  508. # Resize mask
  509. mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
  510. mask = mask[0][0]
  511. im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device)
  512. x_0 = max(box[0], 0)
  513. x_1 = min(box[2] + 1, im_w)
  514. y_0 = max(box[1], 0)
  515. y_1 = min(box[3] + 1, im_h)
  516. im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])]
  517. return im_mask
  518. def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
  519. one = torch.ones(1, dtype=torch.int64)
  520. zero = torch.zeros(1, dtype=torch.int64)
  521. w = box[2] - box[0] + one
  522. h = box[3] - box[1] + one
  523. w = torch.max(torch.cat((w, one)))
  524. h = torch.max(torch.cat((h, one)))
  525. # Set shape to [batchxCxHxW]
  526. mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
  527. # Resize mask
  528. mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
  529. mask = mask[0][0]
  530. x_0 = torch.max(torch.cat((box[0].unsqueeze(0), zero)))
  531. x_1 = torch.min(torch.cat((box[2].unsqueeze(0) + one, im_w.unsqueeze(0))))
  532. y_0 = torch.max(torch.cat((box[1].unsqueeze(0), zero)))
  533. y_1 = torch.min(torch.cat((box[3].unsqueeze(0) + one, im_h.unsqueeze(0))))
  534. unpaded_im_mask = mask[(y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])]
  535. # TODO : replace below with a dynamic padding when support is added in ONNX
  536. # pad y
  537. zeros_y0 = torch.zeros(y_0, unpaded_im_mask.size(1))
  538. zeros_y1 = torch.zeros(im_h - y_1, unpaded_im_mask.size(1))
  539. concat_0 = torch.cat((zeros_y0, unpaded_im_mask.to(dtype=torch.float32), zeros_y1), 0)[0:im_h, :]
  540. # pad x
  541. zeros_x0 = torch.zeros(concat_0.size(0), x_0)
  542. zeros_x1 = torch.zeros(concat_0.size(0), im_w - x_1)
  543. im_mask = torch.cat((zeros_x0, concat_0, zeros_x1), 1)[:, :im_w]
  544. return im_mask
  545. @torch.jit._script_if_tracing
  546. def _onnx_paste_masks_in_image_loop(masks, boxes, im_h, im_w):
  547. res_append = torch.zeros(0, im_h, im_w)
  548. for i in range(masks.size(0)):
  549. mask_res = _onnx_paste_mask_in_image(masks[i][0], boxes[i], im_h, im_w)
  550. mask_res = mask_res.unsqueeze(0)
  551. res_append = torch.cat((res_append, mask_res))
  552. return res_append
  553. def paste_masks_in_image(masks, boxes, img_shape, padding=1):
  554. # type: (Tensor, Tensor, Tuple[int, int], int) -> Tensor
  555. masks, scale = expand_masks(masks, padding=padding)
  556. boxes = expand_boxes(boxes, scale).to(dtype=torch.int64)
  557. im_h, im_w = img_shape
  558. if torchvision._is_tracing():
  559. return _onnx_paste_masks_in_image_loop(
  560. masks, boxes, torch.scalar_tensor(im_h, dtype=torch.int64), torch.scalar_tensor(im_w, dtype=torch.int64)
  561. )[:, None]
  562. res = [paste_mask_in_image(m[0], b, im_h, im_w) for m, b in zip(masks, boxes)]
  563. if len(res) > 0:
  564. ret = torch.stack(res, dim=0)[:, None]
  565. else:
  566. ret = masks.new_empty((0, 1, im_h, im_w))
  567. return ret
  568. class RoIHeads(nn.Module):
  569. __annotations__ = {
  570. "box_coder": det_utils.BoxCoder,
  571. "proposal_matcher": det_utils.Matcher,
  572. "fg_bg_sampler": det_utils.BalancedPositiveNegativeSampler,
  573. }
  574. def __init__(
  575. self,
  576. box_roi_pool,
  577. box_head,
  578. box_predictor,
  579. # Faster R-CNN training
  580. fg_iou_thresh,
  581. bg_iou_thresh,
  582. batch_size_per_image,
  583. positive_fraction,
  584. bbox_reg_weights,
  585. # Faster R-CNN inference
  586. score_thresh,
  587. nms_thresh,
  588. detections_per_img,
  589. # Mask
  590. mask_roi_pool=None,
  591. mask_head=None,
  592. mask_predictor=None,
  593. keypoint_roi_pool=None,
  594. keypoint_head=None,
  595. keypoint_predictor=None,
  596. wirepoint_roi_pool=None,
  597. wirepoint_head=None,
  598. wirepoint_predictor=None,
  599. ):
  600. super().__init__()
  601. self.box_similarity = box_ops.box_iou
  602. # assign ground-truth boxes for each proposal
  603. self.proposal_matcher = det_utils.Matcher(fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)
  604. self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(batch_size_per_image, positive_fraction)
  605. if bbox_reg_weights is None:
  606. bbox_reg_weights = (10.0, 10.0, 5.0, 5.0)
  607. self.box_coder = det_utils.BoxCoder(bbox_reg_weights)
  608. self.box_roi_pool = box_roi_pool
  609. self.box_head = box_head
  610. self.box_predictor = box_predictor
  611. self.score_thresh = score_thresh
  612. self.nms_thresh = nms_thresh
  613. self.detections_per_img = detections_per_img
  614. self.mask_roi_pool = mask_roi_pool
  615. self.mask_head = mask_head
  616. self.mask_predictor = mask_predictor
  617. self.keypoint_roi_pool = keypoint_roi_pool
  618. self.keypoint_head = keypoint_head
  619. self.keypoint_predictor = keypoint_predictor
  620. self.wirepoint_roi_pool = wirepoint_roi_pool
  621. self.wirepoint_head = wirepoint_head
  622. self.wirepoint_predictor = wirepoint_predictor
  623. def has_mask(self):
  624. if self.mask_roi_pool is None:
  625. return False
  626. if self.mask_head is None:
  627. return False
  628. if self.mask_predictor is None:
  629. return False
  630. return True
  631. def has_keypoint(self):
  632. if self.keypoint_roi_pool is None:
  633. return False
  634. if self.keypoint_head is None:
  635. return False
  636. if self.keypoint_predictor is None:
  637. return False
  638. return True
  639. def has_wirepoint(self):
  640. if self.wirepoint_roi_pool is None:
  641. print(f'wirepoint_roi_pool is None')
  642. return False
  643. if self.wirepoint_head is None:
  644. print(f'wirepoint_head is None')
  645. return False
  646. if self.wirepoint_predictor is None:
  647. print(f'wirepoint_roi_predictor is None')
  648. return False
  649. return True
  650. def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):
  651. # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  652. matched_idxs = []
  653. labels = []
  654. for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):
  655. if gt_boxes_in_image.numel() == 0:
  656. # Background image
  657. device = proposals_in_image.device
  658. clamped_matched_idxs_in_image = torch.zeros(
  659. (proposals_in_image.shape[0],), dtype=torch.int64, device=device
  660. )
  661. labels_in_image = torch.zeros((proposals_in_image.shape[0],), dtype=torch.int64, device=device)
  662. else:
  663. # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
  664. match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)
  665. matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)
  666. clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
  667. labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
  668. labels_in_image = labels_in_image.to(dtype=torch.int64)
  669. # Label background (below the low threshold)
  670. bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD
  671. labels_in_image[bg_inds] = 0
  672. # Label ignore proposals (between low and high thresholds)
  673. ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS
  674. labels_in_image[ignore_inds] = -1 # -1 is ignored by sampler
  675. matched_idxs.append(clamped_matched_idxs_in_image)
  676. labels.append(labels_in_image)
  677. return matched_idxs, labels
  678. def subsample(self, labels):
  679. # type: (List[Tensor]) -> List[Tensor]
  680. sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
  681. sampled_inds = []
  682. for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)):
  683. img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0]
  684. sampled_inds.append(img_sampled_inds)
  685. return sampled_inds
  686. def add_gt_proposals(self, proposals, gt_boxes):
  687. # type: (List[Tensor], List[Tensor]) -> List[Tensor]
  688. proposals = [torch.cat((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes)]
  689. return proposals
  690. def check_targets(self, targets):
  691. # type: (Optional[List[Dict[str, Tensor]]]) -> None
  692. if targets is None:
  693. raise ValueError("targets should not be None")
  694. if not all(["boxes" in t for t in targets]):
  695. raise ValueError("Every element of targets should have a boxes key")
  696. if not all(["labels" in t for t in targets]):
  697. raise ValueError("Every element of targets should have a labels key")
  698. if self.has_mask():
  699. if not all(["masks" in t for t in targets]):
  700. raise ValueError("Every element of targets should have a masks key")
  701. def select_training_samples(
  702. self,
  703. proposals, # type: List[Tensor]
  704. targets, # type: Optional[List[Dict[str, Tensor]]]
  705. ):
  706. # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]
  707. self.check_targets(targets)
  708. if targets is None:
  709. raise ValueError("targets should not be None")
  710. dtype = proposals[0].dtype
  711. device = proposals[0].device
  712. gt_boxes = [t["boxes"].to(dtype) for t in targets]
  713. gt_labels = [t["labels"] for t in targets]
  714. # append ground-truth bboxes to propos
  715. proposals = self.add_gt_proposals(proposals, gt_boxes)
  716. # get matching gt indices for each proposal
  717. matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)
  718. # sample a fixed proportion of positive-negative proposals
  719. sampled_inds = self.subsample(labels)
  720. matched_gt_boxes = []
  721. num_images = len(proposals)
  722. for img_id in range(num_images):
  723. img_sampled_inds = sampled_inds[img_id]
  724. proposals[img_id] = proposals[img_id][img_sampled_inds]
  725. labels[img_id] = labels[img_id][img_sampled_inds]
  726. matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]
  727. gt_boxes_in_image = gt_boxes[img_id]
  728. if gt_boxes_in_image.numel() == 0:
  729. gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)
  730. matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])
  731. regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)
  732. return proposals, matched_idxs, labels, regression_targets
  733. def postprocess_detections(
  734. self,
  735. class_logits, # type: Tensor
  736. box_regression, # type: Tensor
  737. proposals, # type: List[Tensor]
  738. image_shapes, # type: List[Tuple[int, int]]
  739. ):
  740. # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
  741. device = class_logits.device
  742. num_classes = class_logits.shape[-1]
  743. boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
  744. pred_boxes = self.box_coder.decode(box_regression, proposals)
  745. pred_scores = F.softmax(class_logits, -1)
  746. pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
  747. pred_scores_list = pred_scores.split(boxes_per_image, 0)
  748. all_boxes = []
  749. all_scores = []
  750. all_labels = []
  751. for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):
  752. boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
  753. # create labels for each prediction
  754. labels = torch.arange(num_classes, device=device)
  755. labels = labels.view(1, -1).expand_as(scores)
  756. # remove predictions with the background label
  757. boxes = boxes[:, 1:]
  758. scores = scores[:, 1:]
  759. labels = labels[:, 1:]
  760. # batch everything, by making every class prediction be a separate instance
  761. boxes = boxes.reshape(-1, 4)
  762. scores = scores.reshape(-1)
  763. labels = labels.reshape(-1)
  764. # remove low scoring boxes
  765. inds = torch.where(scores > self.score_thresh)[0]
  766. boxes, scores, labels = boxes[inds], scores[inds], labels[inds]
  767. # remove empty boxes
  768. keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
  769. boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
  770. # non-maximum suppression, independently done per class
  771. keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
  772. # keep only topk scoring predictions
  773. keep = keep[: self.detections_per_img]
  774. boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
  775. all_boxes.append(boxes)
  776. all_scores.append(scores)
  777. all_labels.append(labels)
  778. return all_boxes, all_scores, all_labels
  779. def forward(
  780. self,
  781. features, # type: Dict[str, Tensor]
  782. proposals, # type: List[Tensor]
  783. image_shapes, # type: List[Tuple[int, int]]
  784. targets=None, # type: Optional[List[Dict[str, Tensor]]]
  785. ):
  786. # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]
  787. """
  788. Args:
  789. features (List[Tensor])
  790. proposals (List[Tensor[N, 4]])
  791. image_shapes (List[Tuple[H, W]])
  792. targets (List[Dict])
  793. """
  794. if targets is not None:
  795. for t in targets:
  796. # TODO: https://github.com/pytorch/pytorch/issues/26731
  797. floating_point_types = (torch.float, torch.double, torch.half)
  798. if not t["boxes"].dtype in floating_point_types:
  799. raise TypeError(f"target boxes must of float type, instead got {t['boxes'].dtype}")
  800. if not t["labels"].dtype == torch.int64:
  801. raise TypeError(f"target labels must of int64 type, instead got {t['labels'].dtype}")
  802. if self.has_keypoint():
  803. if not t["keypoints"].dtype == torch.float32:
  804. raise TypeError(f"target keypoints must of float type, instead got {t['keypoints'].dtype}")
  805. if self.training:
  806. proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
  807. else:
  808. labels = None
  809. regression_targets = None
  810. matched_idxs = None
  811. box_features = self.box_roi_pool(features, proposals, image_shapes)
  812. box_features = self.box_head(box_features)
  813. class_logits, box_regression = self.box_predictor(box_features)
  814. result: List[Dict[str, torch.Tensor]] = []
  815. losses = {}
  816. if self.training:
  817. if labels is None:
  818. raise ValueError("labels cannot be None")
  819. if regression_targets is None:
  820. raise ValueError("regression_targets cannot be None")
  821. loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
  822. losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
  823. else:
  824. print('result append boxes!!!')
  825. boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
  826. num_images = len(boxes)
  827. for i in range(num_images):
  828. result.append(
  829. {
  830. "boxes": boxes[i],
  831. "labels": labels[i],
  832. "scores": scores[i],
  833. }
  834. )
  835. if self.has_mask():
  836. mask_proposals = [p["boxes"] for p in result]
  837. if self.training:
  838. if matched_idxs is None:
  839. raise ValueError("if in training, matched_idxs should not be None")
  840. # during training, only focus on positive boxes
  841. num_images = len(proposals)
  842. mask_proposals = []
  843. pos_matched_idxs = []
  844. for img_id in range(num_images):
  845. pos = torch.where(labels[img_id] > 0)[0]
  846. mask_proposals.append(proposals[img_id][pos])
  847. pos_matched_idxs.append(matched_idxs[img_id][pos])
  848. else:
  849. pos_matched_idxs = None
  850. if self.mask_roi_pool is not None:
  851. mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)
  852. mask_features = self.mask_head(mask_features)
  853. mask_logits = self.mask_predictor(mask_features)
  854. else:
  855. raise Exception("Expected mask_roi_pool to be not None")
  856. loss_mask = {}
  857. if self.training:
  858. if targets is None or pos_matched_idxs is None or mask_logits is None:
  859. raise ValueError("targets, pos_matched_idxs, mask_logits cannot be None when training")
  860. gt_masks = [t["masks"] for t in targets]
  861. gt_labels = [t["labels"] for t in targets]
  862. rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs)
  863. loss_mask = {"loss_mask": rcnn_loss_mask}
  864. else:
  865. labels = [r["labels"] for r in result]
  866. masks_probs = maskrcnn_inference(mask_logits, labels)
  867. for mask_prob, r in zip(masks_probs, result):
  868. r["masks"] = mask_prob
  869. losses.update(loss_mask)
  870. # keep none checks in if conditional so torchscript will conditionally
  871. # compile each branch
  872. if self.has_keypoint():
  873. keypoint_proposals = [p["boxes"] for p in result]
  874. if self.training:
  875. # during training, only focus on positive boxes
  876. num_images = len(proposals)
  877. keypoint_proposals = []
  878. pos_matched_idxs = []
  879. if matched_idxs is None:
  880. raise ValueError("if in trainning, matched_idxs should not be None")
  881. for img_id in range(num_images):
  882. pos = torch.where(labels[img_id] > 0)[0]
  883. keypoint_proposals.append(proposals[img_id][pos])
  884. pos_matched_idxs.append(matched_idxs[img_id][pos])
  885. else:
  886. pos_matched_idxs = None
  887. keypoint_features = self.keypoint_roi_pool(features, keypoint_proposals, image_shapes)
  888. # tmp = keypoint_features[0][0]
  889. # plt.imshow(tmp.detach().numpy())
  890. # print(f'keypoint_features from roi_pool:{keypoint_features.shape}')
  891. keypoint_features = self.keypoint_head(keypoint_features)
  892. # print(f'keypoint_features:{keypoint_features.shape}')
  893. tmp = keypoint_features[0][0]
  894. plt.imshow(tmp.detach().numpy())
  895. keypoint_logits = self.keypoint_predictor(keypoint_features)
  896. # print(f'keypoint_logits:{keypoint_logits.shape}')
  897. """
  898. 接wirenet
  899. """
  900. loss_keypoint = {}
  901. if self.training:
  902. if targets is None or pos_matched_idxs is None:
  903. raise ValueError("both targets and pos_matched_idxs should not be None when in training mode")
  904. gt_keypoints = [t["keypoints"] for t in targets]
  905. rcnn_loss_keypoint = keypointrcnn_loss(
  906. keypoint_logits, keypoint_proposals, gt_keypoints, pos_matched_idxs
  907. )
  908. loss_keypoint = {"loss_keypoint": rcnn_loss_keypoint}
  909. else:
  910. if keypoint_logits is None or keypoint_proposals is None:
  911. raise ValueError(
  912. "both keypoint_logits and keypoint_proposals should not be None when not in training mode"
  913. )
  914. keypoints_probs, kp_scores = keypointrcnn_inference(keypoint_logits, keypoint_proposals)
  915. for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result):
  916. r["keypoints"] = keypoint_prob
  917. r["keypoints_scores"] = kps
  918. losses.update(loss_keypoint)
  919. if self.has_wirepoint():
  920. print(f'wirepoint result:{result}')
  921. wirepoint_proposals = [p["boxes"] for p in result]
  922. if self.training:
  923. # during training, only focus on positive boxes
  924. num_images = len(proposals)
  925. wirepoint_proposals = []
  926. pos_matched_idxs = []
  927. if matched_idxs is None:
  928. raise ValueError("if in trainning, matched_idxs should not be None")
  929. for img_id in range(num_images):
  930. pos = torch.where(labels[img_id] > 0)[0]
  931. wirepoint_proposals.append(proposals[img_id][pos])
  932. pos_matched_idxs.append(matched_idxs[img_id][pos])
  933. else:
  934. pos_matched_idxs = None
  935. # print(f'proposals:{len(proposals)}')
  936. print(f'wirepoint_proposals:{wirepoint_proposals}')
  937. wirepoint_features = self.wirepoint_roi_pool(features, wirepoint_proposals, image_shapes)
  938. # tmp = keypoint_features[0][0]
  939. # plt.imshow(tmp.detach().numpy())
  940. print(f'wirepoint_features from roi_pool:{wirepoint_features.shape}')
  941. outputs, wirepoint_features = self.wirepoint_head(wirepoint_features)
  942. print(f'outputs1 from head:{outputs.shape}')
  943. outputs = merge_features(outputs, wirepoint_proposals)
  944. # wirepoint_features = merge_features(wirepoint_features, wirepoint_proposals)
  945. print(f'outpust:{outputs.shape}')
  946. wirepoint_logits = self.wirepoint_predictor(inputs=outputs, features=wirepoint_features, targets=targets)
  947. x, y, idx, jcs, n_batch, ps, n_out_line, n_out_junc = wirepoint_logits
  948. # print(f'keypoint_features:{wirepoint_features.shape}')
  949. if self.training:
  950. if targets is None or pos_matched_idxs is None:
  951. raise ValueError("both targets and pos_matched_idxs should not be None when in training mode")
  952. loss_weight = {'junc_map': 8.0, 'line_map': 0.5, 'junc_offset': 0.25, 'lpos': 1, 'lneg': 1}
  953. rcnn_loss_wirepoint = wirepoint_head_line_loss(targets, outputs, x, y, idx, loss_weight)
  954. loss_wirepoint = {"loss_wirepoint": rcnn_loss_wirepoint}
  955. else:
  956. pred = wirepoint_inference(x, idx, jcs, n_batch, ps, n_out_line, n_out_junc)
  957. result.append(pred)
  958. loss_wirepoint = {}
  959. # loss_weight = {'junc_map': 8.0, 'line_map': 0.5, 'junc_offset': 0.25, 'lpos': 1, 'lneg': 1}
  960. # rcnn_loss_wirepoint = wirepoint_head_line_loss(targets, outputs, x, y, idx, loss_weight)
  961. # loss_wirepoint = {"loss_wirepoint": rcnn_loss_wirepoint}
  962. # tmp = wirepoint_features[0][0]
  963. # plt.imshow(tmp.detach().numpy())
  964. # wirepoint_logits = self.wirepoint_predictor((outputs,wirepoint_features))
  965. # print(f'keypoint_logits:{wirepoint_logits.shape}')
  966. # loss_wirepoint = {} lm
  967. # result=wirepoint_logits
  968. # result.append(pred) lm
  969. losses.update(loss_wirepoint)
  970. # print(f"result{result}")
  971. # print(f"losses{losses}")
  972. return result, losses
  973. # def merge_features(features, proposals):
  974. # # 假设 roi_pool_features 是你的输入张量,形状为 [600, 256, 128, 128]
  975. #
  976. # # 使用 torch.split 按照每个图像的提议数量分割 features
  977. # proposals_count = sum([p.size(0) for p in proposals])
  978. # features_size = features.size(0)
  979. # # (f'proposals sum:{proposals_count},features batch:{features.size(0)}')
  980. # if proposals_count != features_size:
  981. # raise ValueError("The length of proposals must match the batch size of features.")
  982. #
  983. # split_features = []
  984. # start_idx = 0
  985. # print(f"proposals:{proposals}")
  986. # for proposal in proposals:
  987. # # 提取当前图像的特征
  988. # current_features = features[start_idx:start_idx + proposal.size(0)]
  989. # # print(f'current_features:{current_features.shape}')
  990. # split_features.append(current_features)
  991. # start_idx += 1
  992. #
  993. # features_imgs = []
  994. # for features_per_img in split_features:
  995. # features_per_img, _ = torch.max(features_per_img, dim=0, keepdim=True)
  996. # features_imgs.append(features_per_img)
  997. #
  998. # merged_features = torch.cat(features_imgs, dim=0)
  999. # # print(f' merged_features:{merged_features.shape}')
  1000. # return merged_features
  1001. def merge_features(features, proposals):
  1002. print(f'features in merge_features:{features.shape}')
  1003. print(f'proposals:{len(proposals)}')
  1004. def diagnose_input(features, proposals):
  1005. """诊断输入数据"""
  1006. print("Input Diagnostics:")
  1007. print(f"Features type: {type(features)}, shape: {features.shape}")
  1008. print(f"Proposals type: {type(proposals)}, length: {len(proposals)}")
  1009. for i, p in enumerate(proposals):
  1010. print(f"Proposal {i} shape: {p.shape}")
  1011. def validate_inputs(features, proposals):
  1012. """验证输入的有效性"""
  1013. if features is None or proposals is None:
  1014. raise ValueError("Features or proposals cannot be None")
  1015. proposals_count = sum([p.size(0) for p in proposals])
  1016. features_size = features.size(0)
  1017. if proposals_count != features_size:
  1018. raise ValueError(
  1019. f"Proposals count ({proposals_count}) must match features batch size ({features_size})"
  1020. )
  1021. def safe_max_reduction(features_per_img,proposals):
  1022. print(f'proposal:{proposals.shape},features_per_img:{features_per_img.shape}')
  1023. """安全的最大值压缩"""
  1024. if features_per_img.numel() == 0:
  1025. return torch.zeros_like(features_per_img).unsqueeze(0)
  1026. for feature_map,roi in zip(features_per_img,proposals):
  1027. print(f'feature_map:{feature_map.shape},roi:{roi}')
  1028. roi_off_x=roi[0]
  1029. roi_off_y=roi[1]
  1030. try:
  1031. # 沿着第0维求最大值,保持维度
  1032. max_features, _ = torch.max(features_per_img, dim=0, keepdim=True)
  1033. return max_features
  1034. except Exception as e:
  1035. print(f"Max reduction error: {e}")
  1036. return features_per_img.unsqueeze(0)
  1037. try:
  1038. # 诊断输入(可选)
  1039. # diagnose_input(features, proposals)
  1040. # 验证输入
  1041. validate_inputs(features, proposals)
  1042. # 分割特征
  1043. split_features = []
  1044. start_idx = 0
  1045. for proposal in proposals:
  1046. # 提取当前图像的特征
  1047. current_features = features[start_idx:start_idx + proposal.size(0)]
  1048. split_features.append(current_features)
  1049. start_idx += proposal.size(0)
  1050. # 每张图像特征压缩
  1051. features_imgs = []
  1052. print(f'split_features:{len(split_features)}')
  1053. for features_per_img,proposal in zip(split_features,proposals):
  1054. compressed_features = safe_max_reduction(features_per_img,proposal)
  1055. features_imgs.append(compressed_features)
  1056. # 合并特征
  1057. merged_features = torch.cat(features_imgs, dim=0)
  1058. return merged_features
  1059. except Exception as e:
  1060. print(f"Error in merge_features: {e}")
  1061. # 返回原始特征或None
  1062. return features