loi_heads.py 65 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766
  1. from typing import Dict, List, Optional, Tuple
  2. import matplotlib.pyplot as plt
  3. import torch
  4. import torch.nn.functional as F
  5. import torchvision
  6. # from scipy.optimize import linear_sum_assignment
  7. from torch import nn, Tensor
  8. from libs.vision_libs.ops import boxes as box_ops, roi_align
  9. import libs.vision_libs.models.detection._utils as det_utils
  10. from collections import OrderedDict
  11. def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
  12. # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
  13. """
  14. Computes the loss for Faster R-CNN.
  15. Args:
  16. class_logits (Tensor)
  17. box_regression (Tensor)
  18. labels (list[BoxList])
  19. regression_targets (Tensor)
  20. Returns:
  21. classification_loss (Tensor)
  22. box_loss (Tensor)
  23. """
  24. # print(f'compute fastrcnn_loss:{labels}')
  25. labels = torch.cat(labels, dim=0)
  26. regression_targets = torch.cat(regression_targets, dim=0)
  27. classification_loss = F.cross_entropy(class_logits, labels)
  28. # get indices that correspond to the regression targets for
  29. # the corresponding ground truth labels, to be used with
  30. # advanced indexing
  31. sampled_pos_inds_subset = torch.where(labels > 0)[0]
  32. labels_pos = labels[sampled_pos_inds_subset]
  33. N, num_classes = class_logits.shape
  34. box_regression = box_regression.reshape(N, box_regression.size(-1) // 4, 4)
  35. box_loss = F.smooth_l1_loss(
  36. box_regression[sampled_pos_inds_subset, labels_pos],
  37. regression_targets[sampled_pos_inds_subset],
  38. beta=1 / 9,
  39. reduction="sum",
  40. )
  41. box_loss = box_loss / labels.numel()
  42. return classification_loss, box_loss
  43. def maskrcnn_inference(x, labels):
  44. # type: (Tensor, List[Tensor]) -> List[Tensor]
  45. """
  46. From the results of the CNN, post process the masks
  47. by taking the mask corresponding to the class with max
  48. probability (which are of fixed size and directly output
  49. by the CNN) and return the masks in the mask field of the BoxList.
  50. Args:
  51. x (Tensor): the mask logits
  52. labels (list[BoxList]): bounding boxes that are used as
  53. reference, one for ech image
  54. Returns:
  55. results (list[BoxList]): one BoxList for each image, containing
  56. the extra field mask
  57. """
  58. mask_prob = x.sigmoid()
  59. # select masks corresponding to the predicted classes
  60. num_masks = x.shape[0]
  61. boxes_per_image = [label.shape[0] for label in labels]
  62. labels = torch.cat(labels)
  63. index = torch.arange(num_masks, device=labels.device)
  64. mask_prob = mask_prob[index, labels][:, None]
  65. mask_prob = mask_prob.split(boxes_per_image, dim=0)
  66. return mask_prob
  67. def project_masks_on_boxes(gt_masks, boxes, matched_idxs, M):
  68. # type: (Tensor, Tensor, Tensor, int) -> Tensor
  69. """
  70. Given segmentation masks and the bounding boxes corresponding
  71. to the location of the masks in the image, this function
  72. crops and resizes the masks in the position defined by the
  73. boxes. This prepares the masks for them to be fed to the
  74. loss computation as the targets.
  75. """
  76. matched_idxs = matched_idxs.to(boxes)
  77. rois = torch.cat([matched_idxs[:, None], boxes], dim=1)
  78. gt_masks = gt_masks[:, None].to(rois)
  79. return roi_align(gt_masks, rois, (M, M), 1.0)[:, 0]
  80. def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs):
  81. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  82. """
  83. Args:
  84. proposals (list[BoxList])
  85. mask_logits (Tensor)
  86. targets (list[BoxList])
  87. Return:
  88. mask_loss (Tensor): scalar tensor containing the loss
  89. """
  90. discretization_size = mask_logits.shape[-1]
  91. labels = [gt_label[idxs] for gt_label, idxs in zip(gt_labels, mask_matched_idxs)]
  92. mask_targets = [
  93. project_masks_on_boxes(m, p, i, discretization_size) for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)
  94. ]
  95. labels = torch.cat(labels, dim=0)
  96. mask_targets = torch.cat(mask_targets, dim=0)
  97. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  98. # accept empty tensors, so handle it separately
  99. if mask_targets.numel() == 0:
  100. return mask_logits.sum() * 0
  101. mask_loss = F.binary_cross_entropy_with_logits(
  102. mask_logits[torch.arange(labels.shape[0], device=labels.device), labels], mask_targets
  103. )
  104. return mask_loss
  105. def normalize_tensor(t):
  106. return (t - t.min()) / (t.max() - t.min() + 1e-6)
  107. def line_length(lines):
  108. """
  109. 计算每条线段的长度
  110. lines: [N, 2, 2] 表示 N 条线段,每条线段由两个点组成
  111. 返回: [N]
  112. """
  113. return torch.norm(lines[:, 1] - lines[:, 0], dim=-1)
  114. def line_direction(lines):
  115. """
  116. 计算每条线段的单位方向向量
  117. lines: [N, 2, 2]
  118. 返回: [N, 2] 单位方向向量
  119. """
  120. vec = lines[:, 1] - lines[:, 0]
  121. return F.normalize(vec, dim=-1)
  122. def angle_loss_cosine(pred_dir, gt_dir):
  123. """
  124. 使用 cosine similarity 计算方向差异
  125. pred_dir: [N, 2]
  126. gt_dir: [N, 2]
  127. 返回: [N]
  128. """
  129. cos_sim = torch.sum(pred_dir * gt_dir, dim=-1).clamp(-1.0, 1.0)
  130. return 1.0 - cos_sim # 或者 torch.acos(cos_sim) / pi 也可
  131. def line_length(lines):
  132. """
  133. 计算每条线段的长度
  134. lines: [N, 2, 2] 表示 N 条线段,每条线段由两个点组成
  135. 返回: [N]
  136. """
  137. return torch.norm(lines[:, 1] - lines[:, 0], dim=-1)
  138. def line_direction(lines):
  139. """
  140. 计算每条线段的单位方向向量
  141. lines: [N, 2, 2]
  142. 返回: [N, 2] 单位方向向量
  143. """
  144. vec = lines[:, 1] - lines[:, 0]
  145. return F.normalize(vec, dim=-1)
  146. def angle_loss_cosine(pred_dir, gt_dir):
  147. """
  148. 使用 cosine similarity 计算方向差异
  149. pred_dir: [N, 2]
  150. gt_dir: [N, 2]
  151. 返回: [N]
  152. """
  153. cos_sim = torch.sum(pred_dir * gt_dir, dim=-1).clamp(-1.0, 1.0)
  154. return 1.0 - cos_sim # 或者 torch.acos(cos_sim) / pi 也可
  155. def single_point_to_heatmap(keypoints, rois, heatmap_size):
  156. # type: (Tensor, Tensor, int) -> Tensor
  157. print(f'rois:{rois.shape}')
  158. print(f'heatmap_size:{heatmap_size}')
  159. print(f'keypoints.shape:{keypoints.shape}')
  160. # batch_size, num_keypoints, _ = keypoints.shape
  161. x = keypoints[..., 0].unsqueeze(1)
  162. y = keypoints[..., 1].unsqueeze(1)
  163. gs = generate_gaussian_heatmaps(x, y,num_points=1, heatmap_size=heatmap_size, sigma=1.0)
  164. # show_heatmap(gs[0],'target')
  165. all_roi_heatmap = []
  166. for roi, heatmap in zip(rois, gs):
  167. # show_heatmap(heatmap, 'target')
  168. # print(f'heatmap:{heatmap.shape}')
  169. heatmap = heatmap.unsqueeze(0)
  170. x1, y1, x2, y2 = map(int, roi)
  171. roi_heatmap = torch.zeros_like(heatmap)
  172. roi_heatmap[..., y1:y2 + 1, x1:x2 + 1] = heatmap[..., y1:y2 + 1, x1:x2 + 1]
  173. # show_heatmap(roi_heatmap[0],'roi_heatmap')
  174. all_roi_heatmap.append(roi_heatmap)
  175. all_roi_heatmap = torch.cat(all_roi_heatmap)
  176. print(f'all_roi_heatmap:{all_roi_heatmap.shape}')
  177. return all_roi_heatmap
  178. def line_points_to_heatmap(keypoints, rois, heatmap_size):
  179. # type: (Tensor, Tensor, int) -> Tensor
  180. print(f'rois:{rois.shape}')
  181. print(f'heatmap_size:{heatmap_size}')
  182. print(f'keypoints.shape:{keypoints.shape}')
  183. # batch_size, num_keypoints, _ = keypoints.shape
  184. x = keypoints[..., 0]
  185. y = keypoints[..., 1]
  186. gs = generate_gaussian_heatmaps(x, y, heatmap_size, 1.0)
  187. # show_heatmap(gs[0],'target')
  188. all_roi_heatmap = []
  189. for roi, heatmap in zip(rois, gs):
  190. # print(f'heatmap:{heatmap.shape}')
  191. heatmap = heatmap.unsqueeze(0)
  192. x1, y1, x2, y2 = map(int, roi)
  193. roi_heatmap = torch.zeros_like(heatmap)
  194. roi_heatmap[..., y1:y2 + 1, x1:x2 + 1] = heatmap[..., y1:y2 + 1, x1:x2 + 1]
  195. # show_heatmap(roi_heatmap,'roi_heatmap')
  196. all_roi_heatmap.append(roi_heatmap)
  197. all_roi_heatmap = torch.cat(all_roi_heatmap)
  198. print(f'all_roi_heatmap:{all_roi_heatmap.shape}')
  199. return all_roi_heatmap
  200. """
  201. 修改适配的原结构的点 转热图,适用于带roi_pool版本的
  202. """
  203. def line_points_to_heatmap_(keypoints, rois, heatmap_size):
  204. # type: (Tensor, Tensor, int) -> Tuple[Tensor, Tensor]
  205. print(f'rois:{rois.shape}')
  206. print(f'heatmap_size:{heatmap_size}')
  207. offset_x = rois[:, 0]
  208. offset_y = rois[:, 1]
  209. scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
  210. scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
  211. offset_x = offset_x[:, None]
  212. offset_y = offset_y[:, None]
  213. scale_x = scale_x[:, None]
  214. scale_y = scale_y[:, None]
  215. print(f'keypoints.shape:{keypoints.shape}')
  216. # batch_size, num_keypoints, _ = keypoints.shape
  217. x = keypoints[..., 0]
  218. y = keypoints[..., 1]
  219. # gs=generate_gaussian_heatmaps(x,y,512,1.0)
  220. # print(f'gs_heatmap shape:{gs.shape}')
  221. #
  222. # show_heatmap(gs[0],'target')
  223. x_boundary_inds = x == rois[:, 2][:, None]
  224. y_boundary_inds = y == rois[:, 3][:, None]
  225. x = (x - offset_x) * scale_x
  226. x = x.floor().long()
  227. y = (y - offset_y) * scale_y
  228. y = y.floor().long()
  229. x[x_boundary_inds] = heatmap_size - 1
  230. y[y_boundary_inds] = heatmap_size - 1
  231. # print(f'heatmaps x:{x}')
  232. # print(f'heatmaps y:{y}')
  233. valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
  234. vis = keypoints[..., 2] > 0
  235. valid = (valid_loc & vis).long()
  236. gs_heatmap = generate_gaussian_heatmaps(x, y, heatmap_size, 1.0)
  237. show_heatmap(gs_heatmap[0], 'feature')
  238. # print(f'gs_heatmap:{gs_heatmap.shape}')
  239. #
  240. # lin_ind = y * heatmap_size + x
  241. # print(f'lin_ind:{lin_ind.shape}')
  242. # heatmaps = lin_ind * valid
  243. return gs_heatmap
  244. def generate_gaussian_heatmaps(xs, ys, heatmap_size,num_points=2, sigma=2.0, device='cuda'):
  245. """
  246. 为一组点生成并合并高斯热图。
  247. Args:
  248. xs (Tensor): 形状为 (N, 2) 的所有点的 x 坐标
  249. ys (Tensor): 形状为 (N, 2) 的所有点的 y 坐标
  250. heatmap_size (int): 热图大小 H=W
  251. sigma (float): 高斯核标准差
  252. device (str): 设备类型 ('cpu' or 'cuda')
  253. Returns:
  254. Tensor: 形状为 (H, W) 的合并后的热图
  255. """
  256. assert xs.shape == ys.shape, "x and y must have the same shape"
  257. print(f'xs:{xs.shape}')
  258. N = xs.shape[0]
  259. print(f'N:{N},num_points:{num_points}')
  260. # 创建网格
  261. grid_y, grid_x = torch.meshgrid(
  262. torch.arange(heatmap_size, device=device),
  263. torch.arange(heatmap_size, device=device),
  264. indexing='ij'
  265. )
  266. # print(f'heatmap_size:{heatmap_size}')
  267. # 初始化输出热图
  268. combined_heatmap = torch.zeros((N, heatmap_size, heatmap_size), device=device)
  269. for i in range(N):
  270. heatmap= torch.zeros((heatmap_size, heatmap_size), device=device)
  271. for j in range(num_points):
  272. mu_x1 = xs[i, j].clamp(0, heatmap_size - 1).item()
  273. mu_y1 = ys[i, j].clamp(0, heatmap_size - 1).item()
  274. # print(f'mu_x1,mu_y1:{mu_x1},{mu_y1}')
  275. # 计算距离平方
  276. dist1 = (grid_x - mu_x1) ** 2 + (grid_y - mu_y1) ** 2
  277. # 计算高斯分布
  278. heatmap1 = torch.exp(-dist1 / (2 * sigma ** 2))
  279. heatmap+=heatmap1
  280. # mu_x2 = xs[i, 1].clamp(0, heatmap_size - 1).item()
  281. # mu_y2 = ys[i, 1].clamp(0, heatmap_size - 1).item()
  282. #
  283. # # 计算距离平方
  284. # dist2 = (grid_x - mu_x2) ** 2 + (grid_y - mu_y2) ** 2
  285. #
  286. # # 计算高斯分布
  287. # heatmap2 = torch.exp(-dist2 / (2 * sigma ** 2))
  288. #
  289. # heatmap = heatmap1 + heatmap2
  290. # 将当前热图累加到结果中
  291. combined_heatmap[i] = heatmap
  292. return combined_heatmap
  293. # 显示热图的函数
  294. def show_heatmap(heatmap, title="Heatmap"):
  295. """
  296. 使用 matplotlib 显示热图。
  297. Args:
  298. heatmap (Tensor): 要显示的热图张量
  299. title (str): 图表标题
  300. """
  301. # 如果在 GPU 上,首先将其移动到 CPU 并转换为 numpy 数组
  302. if heatmap.is_cuda:
  303. heatmap = heatmap.cpu().numpy()
  304. else:
  305. heatmap = heatmap.numpy()
  306. plt.imshow(heatmap, cmap='hot', interpolation='nearest')
  307. plt.colorbar()
  308. plt.title(title)
  309. plt.show()
  310. def keypoints_to_heatmap(keypoints, rois, heatmap_size):
  311. # type: (Tensor, Tensor, int) -> Tuple[Tensor, Tensor]
  312. offset_x = rois[:, 0]
  313. offset_y = rois[:, 1]
  314. scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
  315. scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
  316. offset_x = offset_x[:, None]
  317. offset_y = offset_y[:, None]
  318. scale_x = scale_x[:, None]
  319. scale_y = scale_y[:, None]
  320. x = keypoints[..., 0]
  321. y = keypoints[..., 1]
  322. x_boundary_inds = x == rois[:, 2][:, None]
  323. y_boundary_inds = y == rois[:, 3][:, None]
  324. x = (x - offset_x) * scale_x
  325. x = x.floor().long()
  326. y = (y - offset_y) * scale_y
  327. y = y.floor().long()
  328. x[x_boundary_inds] = heatmap_size - 1
  329. y[y_boundary_inds] = heatmap_size - 1
  330. valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
  331. vis = keypoints[..., 2] > 0
  332. valid = (valid_loc & vis).long()
  333. lin_ind = y * heatmap_size + x
  334. heatmaps = lin_ind * valid
  335. return heatmaps, valid
  336. def _onnx_heatmaps_to_keypoints(
  337. maps, maps_i, roi_map_width, roi_map_height, widths_i, heights_i, offset_x_i, offset_y_i
  338. ):
  339. num_keypoints = torch.scalar_tensor(maps.size(1), dtype=torch.int64)
  340. width_correction = widths_i / roi_map_width
  341. height_correction = heights_i / roi_map_height
  342. roi_map = F.interpolate(
  343. maps_i[:, None], size=(int(roi_map_height), int(roi_map_width)), mode="bicubic", align_corners=False
  344. )[:, 0]
  345. w = torch.scalar_tensor(roi_map.size(2), dtype=torch.int64)
  346. pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  347. x_int = pos % w
  348. y_int = (pos - x_int) // w
  349. x = (torch.tensor(0.5, dtype=torch.float32) + x_int.to(dtype=torch.float32)) * width_correction.to(
  350. dtype=torch.float32
  351. )
  352. y = (torch.tensor(0.5, dtype=torch.float32) + y_int.to(dtype=torch.float32)) * height_correction.to(
  353. dtype=torch.float32
  354. )
  355. xy_preds_i_0 = x + offset_x_i.to(dtype=torch.float32)
  356. xy_preds_i_1 = y + offset_y_i.to(dtype=torch.float32)
  357. xy_preds_i_2 = torch.ones(xy_preds_i_1.shape, dtype=torch.float32)
  358. xy_preds_i = torch.stack(
  359. [
  360. xy_preds_i_0.to(dtype=torch.float32),
  361. xy_preds_i_1.to(dtype=torch.float32),
  362. xy_preds_i_2.to(dtype=torch.float32),
  363. ],
  364. 0,
  365. )
  366. # TODO: simplify when indexing without rank will be supported by ONNX
  367. base = num_keypoints * num_keypoints + num_keypoints + 1
  368. ind = torch.arange(num_keypoints)
  369. ind = ind.to(dtype=torch.int64) * base
  370. end_scores_i = (
  371. roi_map.index_select(1, y_int.to(dtype=torch.int64))
  372. .index_select(2, x_int.to(dtype=torch.int64))
  373. .view(-1)
  374. .index_select(0, ind.to(dtype=torch.int64))
  375. )
  376. return xy_preds_i, end_scores_i
  377. @torch.jit._script_if_tracing
  378. def _onnx_heatmaps_to_keypoints_loop(
  379. maps, rois, widths_ceil, heights_ceil, widths, heights, offset_x, offset_y, num_keypoints
  380. ):
  381. xy_preds = torch.zeros((0, 3, int(num_keypoints)), dtype=torch.float32, device=maps.device)
  382. end_scores = torch.zeros((0, int(num_keypoints)), dtype=torch.float32, device=maps.device)
  383. for i in range(int(rois.size(0))):
  384. xy_preds_i, end_scores_i = _onnx_heatmaps_to_keypoints(
  385. maps, maps[i], widths_ceil[i], heights_ceil[i], widths[i], heights[i], offset_x[i], offset_y[i]
  386. )
  387. xy_preds = torch.cat((xy_preds.to(dtype=torch.float32), xy_preds_i.unsqueeze(0).to(dtype=torch.float32)), 0)
  388. end_scores = torch.cat(
  389. (end_scores.to(dtype=torch.float32), end_scores_i.to(dtype=torch.float32).unsqueeze(0)), 0
  390. )
  391. return xy_preds, end_scores
  392. def heatmaps_to_keypoints(maps, rois):
  393. """Extract predicted keypoint locations from heatmaps. Output has shape
  394. (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
  395. for each keypoint.
  396. """
  397. # This function converts a discrete image coordinate in a HEATMAP_SIZE x
  398. # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
  399. # consistency with keypoints_to_heatmap_labels by using the conversion from
  400. # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
  401. # continuous coordinate.
  402. offset_x = rois[:, 0]
  403. offset_y = rois[:, 1]
  404. widths = rois[:, 2] - rois[:, 0]
  405. heights = rois[:, 3] - rois[:, 1]
  406. widths = widths.clamp(min=1)
  407. heights = heights.clamp(min=1)
  408. widths_ceil = widths.ceil()
  409. heights_ceil = heights.ceil()
  410. num_keypoints = maps.shape[1]
  411. if torchvision._is_tracing():
  412. xy_preds, end_scores = _onnx_heatmaps_to_keypoints_loop(
  413. maps,
  414. rois,
  415. widths_ceil,
  416. heights_ceil,
  417. widths,
  418. heights,
  419. offset_x,
  420. offset_y,
  421. torch.scalar_tensor(num_keypoints, dtype=torch.int64),
  422. )
  423. return xy_preds.permute(0, 2, 1), end_scores
  424. xy_preds = torch.zeros((len(rois), 3, num_keypoints), dtype=torch.float32, device=maps.device)
  425. end_scores = torch.zeros((len(rois), num_keypoints), dtype=torch.float32, device=maps.device)
  426. for i in range(len(rois)):
  427. roi_map_width = int(widths_ceil[i].item())
  428. roi_map_height = int(heights_ceil[i].item())
  429. width_correction = widths[i] / roi_map_width
  430. height_correction = heights[i] / roi_map_height
  431. roi_map = F.interpolate(
  432. maps[i][:, None], size=(roi_map_height, roi_map_width), mode="bicubic", align_corners=False
  433. )[:, 0]
  434. # roi_map_probs = scores_to_probs(roi_map.copy())
  435. w = roi_map.shape[2]
  436. pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  437. x_int = pos % w
  438. y_int = torch.div(pos - x_int, w, rounding_mode="floor")
  439. # assert (roi_map_probs[k, y_int, x_int] ==
  440. # roi_map_probs[k, :, :].max())
  441. x = (x_int.float() + 0.5) * width_correction
  442. y = (y_int.float() + 0.5) * height_correction
  443. xy_preds[i, 0, :] = x + offset_x[i]
  444. xy_preds[i, 1, :] = y + offset_y[i]
  445. xy_preds[i, 2, :] = 1
  446. end_scores[i, :] = roi_map[torch.arange(num_keypoints, device=roi_map.device), y_int, x_int]
  447. return xy_preds.permute(0, 2, 1), end_scores
  448. def non_maximum_suppression(a):
  449. ap = F.max_pool2d(a, 3, stride=1, padding=1)
  450. mask = (a == ap).float().clamp(min=0.0)
  451. return a * mask
  452. def heatmaps_to_points(maps, rois):
  453. point_preds = torch.zeros((len(rois), 2), dtype=torch.float32, device=maps.device)
  454. point_end_scores = torch.zeros((len(rois), 1), dtype=torch.float32, device=maps.device)
  455. print(f'heatmaps_to_lines:{maps.shape}')
  456. point_maps=maps[:,0]
  457. print(f'point_map:{point_maps.shape}')
  458. for i in range(len(rois)):
  459. point_roi_map = point_maps[i].unsqueeze(0)
  460. print(f'point_roi_map:{point_roi_map.shape}')
  461. # roi_map_probs = scores_to_probs(roi_map.copy())
  462. w = point_roi_map.shape[2]
  463. flatten_point_roi_map = non_maximum_suppression(point_roi_map).reshape(1, -1)
  464. point_score, point_index = torch.topk(flatten_point_roi_map, k=1)
  465. print(f'point index:{point_index}')
  466. # pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  467. point_x =point_index % w
  468. point_y = torch.div(point_index - point_x, w, rounding_mode="floor")
  469. point_preds[i, 0,] = point_x
  470. point_preds[i, 1,] = point_y
  471. point_end_scores[i, :] = point_roi_map[torch.arange(1, device=point_roi_map.device), point_y, point_x]
  472. return point_preds,point_end_scores
  473. def heatmaps_to_lines(maps, rois):
  474. line_preds = torch.zeros((len(rois), 3, 2), dtype=torch.float32, device=maps.device)
  475. line_end_scores = torch.zeros((len(rois), 2), dtype=torch.float32, device=maps.device)
  476. line_maps=maps[:,1]
  477. for i in range(len(rois)):
  478. line_roi_map = line_maps[i].unsqueeze(0)
  479. print(f'line_roi_map:{line_roi_map.shape}')
  480. # roi_map_probs = scores_to_probs(roi_map.copy())
  481. w = line_roi_map.shape[1]
  482. flatten_line_roi_map = non_maximum_suppression(line_roi_map).reshape(1, -1)
  483. line_score, line_index = torch.topk(flatten_line_roi_map, k=2)
  484. print(f'line index:{line_index}')
  485. # pos = roi_map.reshape(num_keypoints, -1).argmax(dim=1)
  486. pos = line_index
  487. line_x = pos % w
  488. line_y = torch.div(pos - line_x, w, rounding_mode="floor")
  489. line_preds[i, 0, :] = line_x
  490. line_preds[i, 1, :] = line_y
  491. line_preds[i, 2, :] = 1
  492. line_end_scores[i, :] = line_roi_map[torch.arange(1, device=line_roi_map.device), line_y, line_x]
  493. return line_preds.permute(0, 2, 1), line_end_scores
  494. def features_align(features, proposals, img_size):
  495. print(f'lines_features_align features:{features.shape},proposals:{len(proposals)}')
  496. align_feat_list = []
  497. for feat, proposals_per_img in zip(features, proposals):
  498. print(f'lines_features_align feat:{feat.shape}, proposals_per_img:{proposals_per_img.shape}')
  499. if proposals_per_img.shape[0]>0:
  500. feat = feat.unsqueeze(0)
  501. for proposal in proposals_per_img:
  502. align_feat = torch.zeros_like(feat)
  503. # print(f'align_feat:{align_feat.shape}')
  504. x1, y1, x2, y2 = map(lambda v: int(v.item()), proposal)
  505. # 将每个proposal框内的部分赋值到align_feats对应位置
  506. align_feat[:, :, y1:y2 + 1, x1:x2 + 1] = feat[:, :, y1:y2 + 1, x1:x2 + 1]
  507. align_feat_list.append(align_feat)
  508. # print(f'align_feat_list:{align_feat_list}')
  509. if len(align_feat_list) > 0:
  510. feats_tensor = torch.cat(align_feat_list)
  511. print(f'align features :{feats_tensor.shape}')
  512. else:
  513. feats_tensor = None
  514. return feats_tensor
  515. def lines_point_pair_loss(line_logits, proposals, gt_lines, line_matched_idxs):
  516. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  517. N, K, H, W = line_logits.shape
  518. len_proposals = len(proposals)
  519. print(f'lines_point_pair_loss line_logits.shape:{line_logits.shape},len_proposals:{len_proposals}')
  520. if H != W:
  521. raise ValueError(
  522. f"line_logits height and width (last two elements of shape) should be equal. Instead got H = {H} and W = {W}"
  523. )
  524. discretization_size = H
  525. heatmaps = []
  526. gs_heatmaps = []
  527. valid = []
  528. for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_lines, line_matched_idxs):
  529. print(f'line_proposals_per_image:{proposals_per_image.shape}')
  530. print(f'gt_lines:{gt_lines}')
  531. kp = gt_kp_in_image[midx]
  532. gs_heatmaps_per_img = line_points_to_heatmap(kp, proposals_per_image, discretization_size)
  533. gs_heatmaps.append(gs_heatmaps_per_img)
  534. # print(f'heatmaps_per_image:{heatmaps_per_image.shape}')
  535. # heatmaps.append(heatmaps_per_image.view(-1))
  536. # valid.append(valid_per_image.view(-1))
  537. # line_targets = torch.cat(heatmaps, dim=0)
  538. gs_heatmaps = torch.cat(gs_heatmaps, dim=0)
  539. print(f'gs_heatmaps:{gs_heatmaps.shape}, line_logits.shape:{line_logits.squeeze(1).shape}')
  540. # print(f'line_targets:{line_targets.shape},{line_targets}')
  541. # valid = torch.cat(valid, dim=0).to(dtype=torch.uint8)
  542. # valid = torch.where(valid)[0]
  543. # print(f' line_targets[valid]:{line_targets[valid]}')
  544. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  545. # accept empty tensors, so handle it sepaartely
  546. # if line_targets.numel() == 0 or len(valid) == 0:
  547. # return line_logits.sum() * 0
  548. # line_logits = line_logits.view(N * K, H * W)
  549. # print(f'line_logits[valid]:{line_logits[valid].shape}')
  550. line_logits = line_logits.squeeze(1)
  551. # line_loss = F.cross_entropy(line_logits[valid], line_targets[valid])
  552. line_loss = F.cross_entropy(line_logits, gs_heatmaps)
  553. return line_loss
  554. def compute_point_loss(line_logits, proposals, gt_points, point_matched_idxs):
  555. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  556. N, K, H, W = line_logits.shape
  557. len_proposals = len(proposals)
  558. empty_count = 0
  559. non_empty_count = 0
  560. for prop in proposals:
  561. if prop.shape[0] == 0:
  562. empty_count += 1
  563. else:
  564. non_empty_count += 1
  565. print(f"Empty proposals count: {empty_count}")
  566. print(f"Non-empty proposals count: {non_empty_count}")
  567. print(f'starte to compute_point_loss')
  568. print(f'compute_point_loss line_logits.shape:{line_logits.shape},len_proposals:{len_proposals}')
  569. if H != W:
  570. raise ValueError(
  571. f"line_logits height and width (last two elements of shape) should be equal. Instead got H = {H} and W = {W}"
  572. )
  573. discretization_size = H
  574. gs_heatmaps = []
  575. # print(f'point_matched_idxs:{point_matched_idxs}')
  576. for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_points, point_matched_idxs):
  577. print(f'proposals_per_image:{proposals_per_image.shape}')
  578. kp = gt_kp_in_image[midx]
  579. # print(f'gt_kp_in_image:{gt_kp_in_image}')
  580. gs_heatmaps_per_img = single_point_to_heatmap(kp, proposals_per_image, discretization_size)
  581. gs_heatmaps.append(gs_heatmaps_per_img)
  582. gs_heatmaps = torch.cat(gs_heatmaps, dim=0)
  583. print(f'gs_heatmaps:{gs_heatmaps.shape}, line_logits.shape:{line_logits.squeeze(1).shape}')
  584. line_logits = line_logits[:,0]
  585. print(f'single_point_logits:{line_logits.shape}')
  586. line_loss = F.cross_entropy(line_logits, gs_heatmaps)
  587. return line_loss
  588. def lines_to_boxes(lines, img_size=511):
  589. """
  590. 输入:
  591. lines: Tensor of shape (N, 2, 2),表示 N 条线段,每个线段有两个端点 (x, y)
  592. img_size: int,图像尺寸,用于 clamp 边界
  593. 输出:
  594. boxes: Tensor of shape (N, 4),表示 N 个包围盒 [x_min, y_min, x_max, y_max]
  595. """
  596. # 提取所有线段的两个端点
  597. p1 = lines[:, 0] # (N, 2)
  598. p2 = lines[:, 1] # (N, 2)
  599. # 每条线段的 x 和 y 坐标
  600. x_coords = torch.stack([p1[:, 0], p2[:, 0]], dim=1) # (N, 2)
  601. y_coords = torch.stack([p1[:, 1], p2[:, 1]], dim=1) # (N, 2)
  602. # 计算包围盒边界
  603. x_min = x_coords.min(dim=1).values
  604. y_min = y_coords.min(dim=1).values
  605. x_max = x_coords.max(dim=1).values
  606. y_max = y_coords.max(dim=1).values
  607. # 扩展边界并限制在图像范围内
  608. x_min = (x_min - 1).clamp(min=0, max=img_size)
  609. y_min = (y_min - 1).clamp(min=0, max=img_size)
  610. x_max = (x_max + 1).clamp(min=0, max=img_size)
  611. y_max = (y_max + 1).clamp(min=0, max=img_size)
  612. # 合成包围盒
  613. boxes = torch.stack([x_min, y_min, x_max, y_max], dim=1) # (N, 4)
  614. return boxes
  615. def box_iou_pairwise(box1, box2):
  616. """
  617. 输入:
  618. box1: shape (N, 4)
  619. box2: shape (M, 4)
  620. 输出:
  621. ious: shape (min(N, M), ), 只计算 i = j 的配对
  622. """
  623. N = min(len(box1), len(box2))
  624. lt = torch.max(box1[:N, :2], box2[:N, :2]) # 左上角
  625. rb = torch.min(box1[:N, 2:], box2[:N, 2:]) # 右下角
  626. wh = (rb - lt).clamp(min=0) # 宽高
  627. inter_area = wh[:, 0] * wh[:, 1] # 交集面积
  628. area1 = (box1[:N, 2] - box1[:N, 0]) * (box1[:N, 3] - box1[:N, 1])
  629. area2 = (box2[:N, 2] - box2[:N, 0]) * (box2[:N, 3] - box2[:N, 1])
  630. union_area = area1 + area2 - inter_area
  631. ious = inter_area / (union_area + 1e-6)
  632. return ious
  633. def line_iou_loss(x, boxes, gt_lines, matched_idx, img_size=511, alpha=1.0, beta=1.0, gamma=1.0):
  634. """
  635. Args:
  636. x: [N,1,H,W] 热力图
  637. boxes: [N,4] 框坐标
  638. gt_lines: [N,2,3] GT线段(含可见性)
  639. matched_idx: 匹配 index
  640. img_size: 图像尺寸
  641. alpha: IoU 损失权重
  642. beta: 长度损失权重
  643. gamma: 方向角度损失权重
  644. """
  645. losses = []
  646. boxes_per_image = [box.size(0) for box in boxes]
  647. x2 = x.split(boxes_per_image, dim=0)
  648. for xx, bb, gt_line, mid in zip(x2, boxes, gt_lines, matched_idx):
  649. p_prob, _ = heatmaps_to_lines(xx, bb)
  650. pred_lines = p_prob
  651. gt_line_points = gt_line[mid]
  652. if len(pred_lines) == 0 or len(gt_line_points) == 0:
  653. continue
  654. # IoU 损失
  655. pred_boxes = lines_to_boxes(pred_lines, img_size)
  656. gt_boxes = lines_to_boxes(gt_line_points, img_size)
  657. ious = box_iou_pairwise(pred_boxes, gt_boxes)
  658. iou_loss = 1.0 - ious # [N]
  659. # 长度损失
  660. pred_len = line_length(pred_lines)
  661. gt_len = line_length(gt_line_points)
  662. length_diff = F.l1_loss(pred_len, gt_len, reduction='none') # [N]
  663. # 方向角度损失
  664. pred_dir = line_direction(pred_lines)
  665. gt_dir = line_direction(gt_line_points)
  666. ang_loss = angle_loss_cosine(pred_dir, gt_dir) # [N]
  667. # 归一化每一项损失
  668. norm_iou = normalize_tensor(iou_loss)
  669. norm_len = normalize_tensor(length_diff)
  670. norm_ang = normalize_tensor(ang_loss)
  671. total = alpha * norm_iou + beta * norm_len + gamma * norm_ang
  672. losses.append(total)
  673. if not losses:
  674. return None
  675. return torch.mean(torch.cat(losses))
  676. def point_inference(x, point_boxes):
  677. # type: (Tensor, List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  678. points_probs = []
  679. points_scores = []
  680. boxes_per_image = [box.size(0) for box in point_boxes]
  681. x2 = x.split(boxes_per_image, dim=0)
  682. for xx, bb in zip(x2, point_boxes):
  683. point_prob,point_scores = heatmaps_to_points(xx, bb)
  684. points_probs.append(point_prob.unsqueeze(1))
  685. points_scores.append(point_scores)
  686. return points_probs,points_scores
  687. def line_inference(x, line_boxes):
  688. # type: (Tensor, List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  689. lines_probs = []
  690. lines_scores = []
  691. boxes_per_image = [box.size(0) for box in line_boxes]
  692. x2 = x.split(boxes_per_image, dim=0)
  693. for xx, bb in zip(x2, line_boxes):
  694. line_prob, line_scores, = heatmaps_to_lines(xx, bb)
  695. lines_probs.append(line_prob)
  696. lines_scores.append(line_scores)
  697. return lines_probs, lines_scores
  698. def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched_idxs):
  699. # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) -> Tensor
  700. N, K, H, W = keypoint_logits.shape
  701. if H != W:
  702. raise ValueError(
  703. f"keypoint_logits height and width (last two elements of shape) should be equal. Instead got H = {H} and W = {W}"
  704. )
  705. discretization_size = H
  706. heatmaps = []
  707. valid = []
  708. for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_keypoints, keypoint_matched_idxs):
  709. kp = gt_kp_in_image[midx]
  710. heatmaps_per_image, valid_per_image = keypoints_to_heatmap(kp, proposals_per_image, discretization_size)
  711. heatmaps.append(heatmaps_per_image.view(-1))
  712. valid.append(valid_per_image.view(-1))
  713. keypoint_targets = torch.cat(heatmaps, dim=0)
  714. valid = torch.cat(valid, dim=0).to(dtype=torch.uint8)
  715. valid = torch.where(valid)[0]
  716. # torch.mean (in binary_cross_entropy_with_logits) doesn't
  717. # accept empty tensors, so handle it sepaartely
  718. if keypoint_targets.numel() == 0 or len(valid) == 0:
  719. return keypoint_logits.sum() * 0
  720. keypoint_logits = keypoint_logits.view(N * K, H * W)
  721. keypoint_loss = F.cross_entropy(keypoint_logits[valid], keypoint_targets[valid])
  722. return keypoint_loss
  723. def keypointrcnn_inference(x, boxes):
  724. # type: (Tensor, List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  725. kp_probs = []
  726. kp_scores = []
  727. boxes_per_image = [box.size(0) for box in boxes]
  728. x2 = x.split(boxes_per_image, dim=0)
  729. for xx, bb in zip(x2, boxes):
  730. kp_prob, scores = heatmaps_to_keypoints(xx, bb)
  731. kp_probs.append(kp_prob)
  732. kp_scores.append(scores)
  733. return kp_probs, kp_scores
  734. def _onnx_expand_boxes(boxes, scale):
  735. # type: (Tensor, float) -> Tensor
  736. w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
  737. h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
  738. x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
  739. y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
  740. w_half = w_half.to(dtype=torch.float32) * scale
  741. h_half = h_half.to(dtype=torch.float32) * scale
  742. boxes_exp0 = x_c - w_half
  743. boxes_exp1 = y_c - h_half
  744. boxes_exp2 = x_c + w_half
  745. boxes_exp3 = y_c + h_half
  746. boxes_exp = torch.stack((boxes_exp0, boxes_exp1, boxes_exp2, boxes_exp3), 1)
  747. return boxes_exp
  748. # the next two functions should be merged inside Masker
  749. # but are kept here for the moment while we need them
  750. # temporarily for paste_mask_in_image
  751. def expand_boxes(boxes, scale):
  752. # type: (Tensor, float) -> Tensor
  753. if torchvision._is_tracing():
  754. return _onnx_expand_boxes(boxes, scale)
  755. w_half = (boxes[:, 2] - boxes[:, 0]) * 0.5
  756. h_half = (boxes[:, 3] - boxes[:, 1]) * 0.5
  757. x_c = (boxes[:, 2] + boxes[:, 0]) * 0.5
  758. y_c = (boxes[:, 3] + boxes[:, 1]) * 0.5
  759. w_half *= scale
  760. h_half *= scale
  761. boxes_exp = torch.zeros_like(boxes)
  762. boxes_exp[:, 0] = x_c - w_half
  763. boxes_exp[:, 2] = x_c + w_half
  764. boxes_exp[:, 1] = y_c - h_half
  765. boxes_exp[:, 3] = y_c + h_half
  766. return boxes_exp
  767. @torch.jit.unused
  768. def expand_masks_tracing_scale(M, padding):
  769. # type: (int, int) -> float
  770. return torch.tensor(M + 2 * padding).to(torch.float32) / torch.tensor(M).to(torch.float32)
  771. def expand_masks(mask, padding):
  772. # type: (Tensor, int) -> Tuple[Tensor, float]
  773. M = mask.shape[-1]
  774. if torch._C._get_tracing_state(): # could not import is_tracing(), not sure why
  775. scale = expand_masks_tracing_scale(M, padding)
  776. else:
  777. scale = float(M + 2 * padding) / M
  778. padded_mask = F.pad(mask, (padding,) * 4)
  779. return padded_mask, scale
  780. def paste_mask_in_image(mask, box, im_h, im_w):
  781. # type: (Tensor, Tensor, int, int) -> Tensor
  782. TO_REMOVE = 1
  783. w = int(box[2] - box[0] + TO_REMOVE)
  784. h = int(box[3] - box[1] + TO_REMOVE)
  785. w = max(w, 1)
  786. h = max(h, 1)
  787. # Set shape to [batchxCxHxW]
  788. mask = mask.expand((1, 1, -1, -1))
  789. # Resize mask
  790. mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
  791. mask = mask[0][0]
  792. im_mask = torch.zeros((im_h, im_w), dtype=mask.dtype, device=mask.device)
  793. x_0 = max(box[0], 0)
  794. x_1 = min(box[2] + 1, im_w)
  795. y_0 = max(box[1], 0)
  796. y_1 = min(box[3] + 1, im_h)
  797. im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])]
  798. return im_mask
  799. def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
  800. one = torch.ones(1, dtype=torch.int64)
  801. zero = torch.zeros(1, dtype=torch.int64)
  802. w = box[2] - box[0] + one
  803. h = box[3] - box[1] + one
  804. w = torch.max(torch.cat((w, one)))
  805. h = torch.max(torch.cat((h, one)))
  806. # Set shape to [batchxCxHxW]
  807. mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
  808. # Resize mask
  809. mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
  810. mask = mask[0][0]
  811. x_0 = torch.max(torch.cat((box[0].unsqueeze(0), zero)))
  812. x_1 = torch.min(torch.cat((box[2].unsqueeze(0) + one, im_w.unsqueeze(0))))
  813. y_0 = torch.max(torch.cat((box[1].unsqueeze(0), zero)))
  814. y_1 = torch.min(torch.cat((box[3].unsqueeze(0) + one, im_h.unsqueeze(0))))
  815. unpaded_im_mask = mask[(y_0 - box[1]): (y_1 - box[1]), (x_0 - box[0]): (x_1 - box[0])]
  816. # TODO : replace below with a dynamic padding when support is added in ONNX
  817. # pad y
  818. zeros_y0 = torch.zeros(y_0, unpaded_im_mask.size(1))
  819. zeros_y1 = torch.zeros(im_h - y_1, unpaded_im_mask.size(1))
  820. concat_0 = torch.cat((zeros_y0, unpaded_im_mask.to(dtype=torch.float32), zeros_y1), 0)[0:im_h, :]
  821. # pad x
  822. zeros_x0 = torch.zeros(concat_0.size(0), x_0)
  823. zeros_x1 = torch.zeros(concat_0.size(0), im_w - x_1)
  824. im_mask = torch.cat((zeros_x0, concat_0, zeros_x1), 1)[:, :im_w]
  825. return im_mask
  826. @torch.jit._script_if_tracing
  827. def _onnx_paste_masks_in_image_loop(masks, boxes, im_h, im_w):
  828. res_append = torch.zeros(0, im_h, im_w)
  829. for i in range(masks.size(0)):
  830. mask_res = _onnx_paste_mask_in_image(masks[i][0], boxes[i], im_h, im_w)
  831. mask_res = mask_res.unsqueeze(0)
  832. res_append = torch.cat((res_append, mask_res))
  833. return res_append
  834. def paste_masks_in_image(masks, boxes, img_shape, padding=1):
  835. # type: (Tensor, Tensor, Tuple[int, int], int) -> Tensor
  836. masks, scale = expand_masks(masks, padding=padding)
  837. boxes = expand_boxes(boxes, scale).to(dtype=torch.int64)
  838. im_h, im_w = img_shape
  839. if torchvision._is_tracing():
  840. return _onnx_paste_masks_in_image_loop(
  841. masks, boxes, torch.scalar_tensor(im_h, dtype=torch.int64), torch.scalar_tensor(im_w, dtype=torch.int64)
  842. )[:, None]
  843. res = [paste_mask_in_image(m[0], b, im_h, im_w) for m, b in zip(masks, boxes)]
  844. if len(res) > 0:
  845. ret = torch.stack(res, dim=0)[:, None]
  846. else:
  847. ret = masks.new_empty((0, 1, im_h, im_w))
  848. return ret
  849. class RoIHeads(nn.Module):
  850. __annotations__ = {
  851. "box_coder": det_utils.BoxCoder,
  852. "proposal_matcher": det_utils.Matcher,
  853. "fg_bg_sampler": det_utils.BalancedPositiveNegativeSampler,
  854. }
  855. def __init__(
  856. self,
  857. box_roi_pool,
  858. box_head,
  859. box_predictor,
  860. # Faster R-CNN training
  861. fg_iou_thresh,
  862. bg_iou_thresh,
  863. batch_size_per_image,
  864. positive_fraction,
  865. bbox_reg_weights,
  866. # Faster R-CNN inference
  867. score_thresh,
  868. nms_thresh,
  869. detections_per_img,
  870. # Line
  871. line_roi_pool=None,
  872. line_head=None,
  873. line_predictor=None,
  874. # Mask
  875. mask_roi_pool=None,
  876. mask_head=None,
  877. mask_predictor=None,
  878. keypoint_roi_pool=None,
  879. keypoint_head=None,
  880. keypoint_predictor=None,
  881. ):
  882. super().__init__()
  883. self.box_similarity = box_ops.box_iou
  884. # assign ground-truth boxes for each proposal
  885. self.proposal_matcher = det_utils.Matcher(fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)
  886. self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(batch_size_per_image, positive_fraction)
  887. if bbox_reg_weights is None:
  888. bbox_reg_weights = (10.0, 10.0, 5.0, 5.0)
  889. self.box_coder = det_utils.BoxCoder(bbox_reg_weights)
  890. self.box_roi_pool = box_roi_pool
  891. self.box_head = box_head
  892. self.box_predictor = box_predictor
  893. self.score_thresh = score_thresh
  894. self.nms_thresh = nms_thresh
  895. self.detections_per_img = detections_per_img
  896. self.line_roi_pool = line_roi_pool
  897. self.line_head = line_head
  898. self.line_predictor = line_predictor
  899. self.mask_roi_pool = mask_roi_pool
  900. self.mask_head = mask_head
  901. self.mask_predictor = mask_predictor
  902. self.keypoint_roi_pool = keypoint_roi_pool
  903. self.keypoint_head = keypoint_head
  904. self.keypoint_predictor = keypoint_predictor
  905. self.channel_compress = nn.Sequential(
  906. nn.Conv2d(256, 8, kernel_size=1),
  907. nn.BatchNorm2d(8),
  908. nn.ReLU(inplace=True)
  909. )
  910. def has_mask(self):
  911. if self.mask_roi_pool is None:
  912. return False
  913. if self.mask_head is None:
  914. return False
  915. if self.mask_predictor is None:
  916. return False
  917. return True
  918. def has_keypoint(self):
  919. if self.keypoint_roi_pool is None:
  920. return False
  921. if self.keypoint_head is None:
  922. return False
  923. if self.keypoint_predictor is None:
  924. return False
  925. return True
  926. def has_line(self):
  927. # if self.line_roi_pool is None:
  928. # return False
  929. if self.line_head is None:
  930. return False
  931. # if self.line_predictor is None:
  932. # return False
  933. return True
  934. def assign_targets_to_proposals(self, proposals, gt_boxes, gt_labels):
  935. # type: (List[Tensor], List[Tensor], List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
  936. matched_idxs = []
  937. labels = []
  938. for proposals_in_image, gt_boxes_in_image, gt_labels_in_image in zip(proposals, gt_boxes, gt_labels):
  939. if gt_boxes_in_image.numel() == 0:
  940. # Background image
  941. device = proposals_in_image.device
  942. clamped_matched_idxs_in_image = torch.zeros(
  943. (proposals_in_image.shape[0],), dtype=torch.int64, device=device
  944. )
  945. labels_in_image = torch.zeros((proposals_in_image.shape[0],), dtype=torch.int64, device=device)
  946. else:
  947. # set to self.box_similarity when https://github.com/pytorch/pytorch/issues/27495 lands
  948. match_quality_matrix = box_ops.box_iou(gt_boxes_in_image, proposals_in_image)
  949. matched_idxs_in_image = self.proposal_matcher(match_quality_matrix)
  950. clamped_matched_idxs_in_image = matched_idxs_in_image.clamp(min=0)
  951. labels_in_image = gt_labels_in_image[clamped_matched_idxs_in_image]
  952. labels_in_image = labels_in_image.to(dtype=torch.int64)
  953. # Label background (below the low threshold)
  954. bg_inds = matched_idxs_in_image == self.proposal_matcher.BELOW_LOW_THRESHOLD
  955. labels_in_image[bg_inds] = 0
  956. # Label ignore proposals (between low and high thresholds)
  957. ignore_inds = matched_idxs_in_image == self.proposal_matcher.BETWEEN_THRESHOLDS
  958. labels_in_image[ignore_inds] = -1 # -1 is ignored by sampler
  959. matched_idxs.append(clamped_matched_idxs_in_image)
  960. labels.append(labels_in_image)
  961. return matched_idxs, labels
  962. def subsample(self, labels):
  963. # type: (List[Tensor]) -> List[Tensor]
  964. sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
  965. sampled_inds = []
  966. for img_idx, (pos_inds_img, neg_inds_img) in enumerate(zip(sampled_pos_inds, sampled_neg_inds)):
  967. img_sampled_inds = torch.where(pos_inds_img | neg_inds_img)[0]
  968. sampled_inds.append(img_sampled_inds)
  969. return sampled_inds
  970. def add_gt_proposals(self, proposals, gt_boxes):
  971. # type: (List[Tensor], List[Tensor]) -> List[Tensor]
  972. proposals = [torch.cat((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes)]
  973. return proposals
  974. def check_targets(self, targets):
  975. # type: (Optional[List[Dict[str, Tensor]]]) -> None
  976. if targets is None:
  977. raise ValueError("targets should not be None")
  978. if not all(["boxes" in t for t in targets]):
  979. raise ValueError("Every element of targets should have a boxes key")
  980. if not all(["labels" in t for t in targets]):
  981. raise ValueError("Every element of targets should have a labels key")
  982. if self.has_mask():
  983. if not all(["masks" in t for t in targets]):
  984. raise ValueError("Every element of targets should have a masks key")
  985. def select_training_samples(
  986. self,
  987. proposals, # type: List[Tensor]
  988. targets, # type: Optional[List[Dict[str, Tensor]]]
  989. ):
  990. # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]
  991. self.check_targets(targets)
  992. if targets is None:
  993. raise ValueError("targets should not be None")
  994. dtype = proposals[0].dtype
  995. device = proposals[0].device
  996. gt_boxes = [t["boxes"].to(dtype) for t in targets]
  997. gt_labels = [t["labels"] for t in targets]
  998. # append ground-truth bboxes to propos
  999. proposals = self.add_gt_proposals(proposals, gt_boxes)
  1000. # get matching gt indices for each proposal
  1001. matched_idxs, labels = self.assign_targets_to_proposals(proposals, gt_boxes, gt_labels)
  1002. # sample a fixed proportion of positive-negative proposals
  1003. sampled_inds = self.subsample(labels)
  1004. matched_gt_boxes = []
  1005. num_images = len(proposals)
  1006. for img_id in range(num_images):
  1007. img_sampled_inds = sampled_inds[img_id]
  1008. proposals[img_id] = proposals[img_id][img_sampled_inds]
  1009. labels[img_id] = labels[img_id][img_sampled_inds]
  1010. matched_idxs[img_id] = matched_idxs[img_id][img_sampled_inds]
  1011. gt_boxes_in_image = gt_boxes[img_id]
  1012. if gt_boxes_in_image.numel() == 0:
  1013. gt_boxes_in_image = torch.zeros((1, 4), dtype=dtype, device=device)
  1014. matched_gt_boxes.append(gt_boxes_in_image[matched_idxs[img_id]])
  1015. regression_targets = self.box_coder.encode(matched_gt_boxes, proposals)
  1016. return proposals, matched_idxs, labels, regression_targets
  1017. def postprocess_detections(
  1018. self,
  1019. class_logits, # type: Tensor
  1020. box_regression, # type: Tensor
  1021. proposals, # type: List[Tensor]
  1022. image_shapes, # type: List[Tuple[int, int]]
  1023. ):
  1024. # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
  1025. device = class_logits.device
  1026. num_classes = class_logits.shape[-1]
  1027. boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
  1028. pred_boxes = self.box_coder.decode(box_regression, proposals)
  1029. pred_scores = F.softmax(class_logits, -1)
  1030. pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
  1031. pred_scores_list = pred_scores.split(boxes_per_image, 0)
  1032. all_boxes = []
  1033. all_scores = []
  1034. all_labels = []
  1035. for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):
  1036. boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
  1037. # create labels for each prediction
  1038. labels = torch.arange(num_classes, device=device)
  1039. labels = labels.view(1, -1).expand_as(scores)
  1040. # remove predictions with the background label
  1041. boxes = boxes[:, 1:]
  1042. scores = scores[:, 1:]
  1043. labels = labels[:, 1:]
  1044. # batch everything, by making every class prediction be a separate instance
  1045. boxes = boxes.reshape(-1, 4)
  1046. scores = scores.reshape(-1)
  1047. labels = labels.reshape(-1)
  1048. # remove low scoring boxes
  1049. inds = torch.where(scores > self.score_thresh)[0]
  1050. boxes, scores, labels = boxes[inds], scores[inds], labels[inds]
  1051. # remove empty boxes
  1052. keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
  1053. boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
  1054. # non-maximum suppression, independently done per class
  1055. keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
  1056. # keep only topk scoring predictions
  1057. keep = keep[: self.detections_per_img]
  1058. boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
  1059. all_boxes.append(boxes)
  1060. all_scores.append(scores)
  1061. all_labels.append(labels)
  1062. return all_boxes, all_scores, all_labels
  1063. def forward(
  1064. self,
  1065. features, # type: Dict[str, Tensor]
  1066. proposals, # type: List[Tensor]
  1067. image_shapes, # type: List[Tuple[int, int]]
  1068. targets=None, # type: Optional[List[Dict[str, Tensor]]]
  1069. ):
  1070. # type: (...) -> Tuple[List[Dict[str, Tensor]], Dict[str, Tensor]]
  1071. """
  1072. Args:
  1073. features (List[Tensor])
  1074. proposals (List[Tensor[N, 4]])
  1075. image_shapes (List[Tuple[H, W]])
  1076. targets (List[Dict])
  1077. """
  1078. print(f'roihead forward!!!')
  1079. if targets is not None:
  1080. for t in targets:
  1081. # TODO: https://github.com/pytorch/pytorch/issues/26731
  1082. floating_point_types = (torch.float, torch.double, torch.half)
  1083. if not t["boxes"].dtype in floating_point_types:
  1084. raise TypeError(f"target boxes must of float type, instead got {t['boxes'].dtype}")
  1085. if not t["labels"].dtype == torch.int64:
  1086. raise TypeError(f"target labels must of int64 type, instead got {t['labels'].dtype}")
  1087. if self.has_keypoint():
  1088. if not t["keypoints"].dtype == torch.float32:
  1089. raise TypeError(f"target keypoints must of float type, instead got {t['keypoints'].dtype}")
  1090. if self.training:
  1091. proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
  1092. else:
  1093. if targets is not None:
  1094. proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
  1095. else:
  1096. labels = None
  1097. regression_targets = None
  1098. matched_idxs = None
  1099. box_features = self.box_roi_pool(features, proposals, image_shapes)
  1100. box_features = self.box_head(box_features)
  1101. class_logits, box_regression = self.box_predictor(box_features)
  1102. result: List[Dict[str, torch.Tensor]] = []
  1103. losses = {}
  1104. # _, C, H, W = features['0'].shape # 忽略 batch_size,因为我们只关心 C, H, W
  1105. if self.training:
  1106. if labels is None:
  1107. raise ValueError("labels cannot be None")
  1108. if regression_targets is None:
  1109. raise ValueError("regression_targets cannot be None")
  1110. print(f'boxes compute losses')
  1111. loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
  1112. losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
  1113. else:
  1114. if targets is not None:
  1115. loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
  1116. losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
  1117. boxes, scores, labels = self.postprocess_detections(class_logits, box_regression, proposals,
  1118. image_shapes)
  1119. num_images = len(boxes)
  1120. for i in range(num_images):
  1121. result.append(
  1122. {
  1123. "boxes": boxes[i],
  1124. "labels": labels[i],
  1125. "scores": scores[i],
  1126. }
  1127. )
  1128. if self.has_line():
  1129. print(f'roi_heads forward has_line()!!!!')
  1130. # print(f'labels:{labels}')
  1131. line_proposals = [p["boxes"] for p in result]
  1132. point_proposals = [p["boxes"] for p in result]
  1133. print(f'boxes_proposals:{len(line_proposals)}')
  1134. # if line_proposals is None or len(line_proposals) == 0:
  1135. # # 返回空特征或者跳过该部分计算
  1136. # return torch.empty(0, C, H, W).to(features['0'].device)
  1137. if self.training:
  1138. # during training, only focus on positive boxes
  1139. num_images = len(proposals)
  1140. print(f'num_images:{num_images}')
  1141. line_proposals = []
  1142. point_proposals = []
  1143. arc_proposals = []
  1144. pos_matched_idxs = []
  1145. line_pos_matched_idxs = []
  1146. point_pos_matched_idxs = []
  1147. if matched_idxs is None:
  1148. raise ValueError("if in trainning, matched_idxs should not be None")
  1149. for img_id in range(num_images):
  1150. pos = torch.where(labels[img_id] > 0)[0]
  1151. line_pos=torch.where(labels[img_id] ==2)[0]
  1152. point_pos=torch.where(labels[img_id] ==1)[0]
  1153. line_proposals.append(proposals[img_id][line_pos])
  1154. point_proposals.append(proposals[img_id][point_pos])
  1155. line_pos_matched_idxs.append(matched_idxs[img_id][line_pos])
  1156. point_pos_matched_idxs.append(matched_idxs[img_id][point_pos])
  1157. # pos_matched_idxs.append(matched_idxs[img_id][pos])
  1158. else:
  1159. if targets is not None:
  1160. pos_matched_idxs = []
  1161. num_images = len(proposals)
  1162. line_proposals = []
  1163. point_proposals=[]
  1164. arc_proposals=[]
  1165. line_pos_matched_idxs = []
  1166. point_pos_matched_idxs = []
  1167. print(f'val num_images:{num_images}')
  1168. if matched_idxs is None:
  1169. raise ValueError("if in trainning, matched_idxs should not be None")
  1170. for img_id in range(num_images):
  1171. pos = torch.where(labels[img_id] > 0)[0]
  1172. # line_proposals.append(proposals[img_id][pos])
  1173. # pos_matched_idxs.append(matched_idxs[img_id][pos])
  1174. line_pos = torch.where(labels[img_id] == 2)[0]
  1175. point_pos = torch.where(labels[img_id] == 1)[0]
  1176. line_proposals.append(proposals[img_id][line_pos])
  1177. point_proposals.append(proposals[img_id][point_pos])
  1178. line_pos_matched_idxs.append(matched_idxs[img_id][line_pos])
  1179. point_pos_matched_idxs.append(matched_idxs[img_id][point_pos])
  1180. else:
  1181. pos_matched_idxs = None
  1182. print(f'line_proposals:{len(line_proposals)}')
  1183. # line_features = self.line_roi_pool(features, line_proposals, image_shapes)
  1184. # print(f'line_features from line_roi_pool:{line_features.shape}')
  1185. #(b,256,512,512)
  1186. # cs_features = self.channel_compress(features['0'])
  1187. #(b.8,512,512)
  1188. cs_features= features['0']
  1189. all_proposals=line_proposals+point_proposals
  1190. # print(f'point_proposals:{point_proposals}')
  1191. # print(f'all_proposals:{all_proposals}')
  1192. for p in point_proposals:
  1193. print(f'point_proposal:{p.shape}')
  1194. for ap in all_proposals:
  1195. print(f'ap_proposal:{ap.shape}')
  1196. filtered_proposals = [proposal for proposal in all_proposals if proposal.shape[0] > 0]
  1197. if len(filtered_proposals) > 0:
  1198. filtered_proposals_tensor=torch.cat(filtered_proposals)
  1199. print(f'filtered_proposals_tensor:{filtered_proposals_tensor.shape}')
  1200. line_proposals_tensor=torch.cat(line_proposals)
  1201. print(f'line_proposals_tensor:{line_proposals_tensor.shape}')
  1202. point_proposals_tensor=torch.cat(point_proposals)
  1203. print(f'point_proposals_tensor:{point_proposals_tensor.shape}')
  1204. # line_features = lines_features_align(line_features, filtered_proposals, image_shapes)
  1205. line_features=None
  1206. # line_features = features_align(cs_features, line_proposals, image_shapes)
  1207. # if line_features is not None:
  1208. # print(f'line_features:{line_features.shape}')
  1209. # if line_features is not None and point_features is not None:
  1210. # combine_features = torch.cat((point_features, line_features), dim=0)
  1211. # elif line_features is not None:
  1212. # combine_features =line_features
  1213. # elif point_features is not None:
  1214. # combine_features =point_features
  1215. # combine_features = point_features
  1216. # print(f'line_features from features_align:{combine_features.shape}')
  1217. # combine_features = self.line_head(cs_features)
  1218. # if point_features is not None:
  1219. # print(f'point_features:{point_features.shape}')
  1220. #(N,1,512,512)
  1221. # print(f'combine_features from line_head:{combine_features.shape}')
  1222. combine_features = self.line_predictor(cs_features )
  1223. print(f'combine_features from line_predictor:{combine_features.shape}')
  1224. point_features = features_align(combine_features, point_proposals, image_shapes)
  1225. print(f'point_features from features_align:{point_features.shape}')
  1226. combine_features=point_features
  1227. # line_logits = combine_features
  1228. # print(f'line_logits:{line_logits.shape}')
  1229. loss_line = {}
  1230. loss_line_iou = {}
  1231. loss_point = {}
  1232. if self.training:
  1233. if targets is None or pos_matched_idxs is None:
  1234. raise ValueError("both targets and pos_matched_idxs should not be None when in training mode")
  1235. gt_lines = [t["lines"] for t in targets]
  1236. gt_points = [t["points"] for t in targets]
  1237. print(f'gt_lines:{gt_lines[0].shape}')
  1238. h, w = targets[0]["img_size"]
  1239. img_size = h
  1240. # rcnn_loss_line = lines_point_pair_loss(
  1241. # line_logits, line_proposals, gt_lines, pos_matched_idxs
  1242. # )
  1243. # iou_loss = line_iou_loss(line_logits, line_proposals, gt_lines, pos_matched_idxs, img_size)
  1244. gt_lines_tensor=torch.cat(gt_lines)
  1245. gt_points_tensor = torch.cat(gt_points)
  1246. print(f'gt_lines_tensor:{gt_lines_tensor.shape}')
  1247. print(f'gt_points_tensor:{gt_points_tensor.shape}')
  1248. if gt_lines_tensor.shape[0]>0 and line_features is not None:
  1249. loss_line = lines_point_pair_loss(
  1250. combine_features, line_proposals, gt_lines, line_pos_matched_idxs
  1251. )
  1252. loss_line_iou = line_iou_loss(combine_features, line_proposals, gt_lines, line_pos_matched_idxs, img_size)
  1253. if gt_points_tensor.shape[0]>0 and point_features is not None:
  1254. loss_point = compute_point_loss(
  1255. combine_features, point_proposals, gt_points, point_pos_matched_idxs
  1256. )
  1257. if not loss_line:
  1258. loss_line = torch.tensor(0.0, device=cs_features.device)
  1259. if not loss_line_iou:
  1260. loss_line_iou = torch.tensor(0.0, device=cs_features.device)
  1261. loss_line = {"loss_line": loss_line}
  1262. loss_line_iou = {'loss_line_iou': loss_line_iou}
  1263. loss_point = {"loss_point": loss_point}
  1264. else:
  1265. if targets is not None:
  1266. h, w = targets[0]["img_size"]
  1267. img_size = h
  1268. gt_lines = [t["lines"] for t in targets]
  1269. gt_points = [t["points"] for t in targets]
  1270. gt_lines_tensor = torch.cat(gt_lines)
  1271. gt_points_tensor = torch.cat(gt_points)
  1272. if gt_lines_tensor.shape[0] > 0 and line_features is not None:
  1273. loss_line = lines_point_pair_loss(
  1274. combine_features, line_proposals, gt_lines, line_pos_matched_idxs
  1275. )
  1276. loss_line_iou = line_iou_loss(combine_features, line_proposals, gt_lines, line_pos_matched_idxs,
  1277. img_size)
  1278. if gt_points_tensor.shape[0] > 0 and point_features is not None:
  1279. loss_point = compute_point_loss(
  1280. combine_features, point_proposals, gt_points, point_pos_matched_idxs
  1281. )
  1282. if not loss_line :
  1283. loss_line=torch.tensor(0.0,device=cs_features.device)
  1284. if not loss_line_iou :
  1285. loss_line_iou=torch.tensor(0.0,device=cs_features.device)
  1286. if not loss_point:
  1287. loss_point=torch.tensor(0.0,device=cs_features.device)
  1288. loss_line = {"loss_line": loss_line}
  1289. loss_line_iou = {'loss_line_iou': loss_line_iou}
  1290. loss_point={"loss_point":loss_point}
  1291. else:
  1292. if combine_features is None or line_proposals is None:
  1293. raise ValueError(
  1294. "both keypoint_logits and keypoint_proposals should not be None when not in training mode"
  1295. )
  1296. # if line_features is not None:
  1297. # lines_probs, lines_scores = line_inference(combine_features,line_proposals)
  1298. # for keypoint_prob, kps, r in zip(lines_probs, lines_scores, result):
  1299. # r["lines"] = keypoint_prob
  1300. # r["liness_scores"] = kps
  1301. if point_features is not None:
  1302. point_probs, points_scores=point_inference(combine_features, point_proposals,)
  1303. for points, ps, r in zip(point_probs,points_scores, result):
  1304. print(f'points_prob :{points.shape}')
  1305. r["points"] = points
  1306. r["points_scores"] = ps
  1307. losses.update(loss_line)
  1308. losses.update(loss_line_iou)
  1309. losses.update(loss_point)
  1310. print(f'losses:{losses}')
  1311. if self.has_mask():
  1312. mask_proposals = [p["boxes"] for p in result]
  1313. if self.training:
  1314. if matched_idxs is None:
  1315. raise ValueError("if in training, matched_idxs should not be None")
  1316. # during training, only focus on positive boxes
  1317. num_images = len(proposals)
  1318. mask_proposals = []
  1319. pos_matched_idxs = []
  1320. for img_id in range(num_images):
  1321. pos = torch.where(labels[img_id] > 0)[0]
  1322. mask_proposals.append(proposals[img_id][pos])
  1323. pos_matched_idxs.append(matched_idxs[img_id][pos])
  1324. else:
  1325. pos_matched_idxs = None
  1326. if self.mask_roi_pool is not None:
  1327. mask_features = self.mask_roi_pool(features, mask_proposals, image_shapes)
  1328. mask_features = self.mask_head(mask_features)
  1329. mask_logits = self.mask_predictor(mask_features)
  1330. else:
  1331. raise Exception("Expected mask_roi_pool to be not None")
  1332. loss_mask = {}
  1333. if self.training:
  1334. if targets is None or pos_matched_idxs is None or mask_logits is None:
  1335. raise ValueError("targets, pos_matched_idxs, mask_logits cannot be None when training")
  1336. gt_masks = [t["masks"] for t in targets]
  1337. gt_labels = [t["labels"] for t in targets]
  1338. rcnn_loss_mask = maskrcnn_loss(mask_logits, mask_proposals, gt_masks, gt_labels, pos_matched_idxs)
  1339. loss_mask = {"loss_mask": rcnn_loss_mask}
  1340. else:
  1341. labels = [r["labels"] for r in result]
  1342. masks_probs = maskrcnn_inference(mask_logits, labels)
  1343. for mask_prob, r in zip(masks_probs, result):
  1344. r["masks"] = mask_prob
  1345. losses.update(loss_mask)
  1346. # keep none checks in if conditional so torchscript will conditionally
  1347. # compile each branch
  1348. if self.has_keypoint():
  1349. keypoint_proposals = [p["boxes"] for p in result]
  1350. if self.training:
  1351. # during training, only focus on positive boxes
  1352. num_images = len(proposals)
  1353. keypoint_proposals = []
  1354. pos_matched_idxs = []
  1355. if matched_idxs is None:
  1356. raise ValueError("if in trainning, matched_idxs should not be None")
  1357. for img_id in range(num_images):
  1358. pos = torch.where(labels[img_id] > 0)[0]
  1359. keypoint_proposals.append(proposals[img_id][pos])
  1360. pos_matched_idxs.append(matched_idxs[img_id][pos])
  1361. else:
  1362. pos_matched_idxs = None
  1363. keypoint_features = self.line_roi_pool(features, keypoint_proposals, image_shapes)
  1364. keypoint_features = self.line_head(keypoint_features)
  1365. keypoint_logits = self.line_predictor(keypoint_features)
  1366. loss_keypoint = {}
  1367. if self.training:
  1368. if targets is None or pos_matched_idxs is None:
  1369. raise ValueError("both targets and pos_matched_idxs should not be None when in training mode")
  1370. gt_keypoints = [t["keypoints"] for t in targets]
  1371. rcnn_loss_keypoint = keypointrcnn_loss(
  1372. keypoint_logits, keypoint_proposals, gt_keypoints, pos_matched_idxs
  1373. )
  1374. loss_keypoint = {"loss_keypoint": rcnn_loss_keypoint}
  1375. else:
  1376. if keypoint_logits is None or keypoint_proposals is None:
  1377. raise ValueError(
  1378. "both keypoint_logits and keypoint_proposals should not be None when not in training mode"
  1379. )
  1380. keypoints_probs, lines_scores = keypointrcnn_inference(keypoint_logits, keypoint_proposals)
  1381. for keypoint_prob, kps, r in zip(keypoints_probs, lines_scores, result):
  1382. r["keypoints"] = keypoint_prob
  1383. r["keypoints_scores"] = kps
  1384. losses.update(loss_keypoint)
  1385. return result, losses