line_detect.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699
  1. import os
  2. from typing import Any, Callable, List, Optional, Tuple
  3. import torch
  4. from torch import nn
  5. from libs.vision_libs.models.detection.anchor_utils import AnchorGenerator
  6. from libs.vision_libs.models.detection.rpn import RPNHead, RegionProposalNetwork
  7. from libs.vision_libs.models.detection.transform import GeneralizedRCNNTransform
  8. from libs.vision_libs.ops import misc as misc_nn_ops, MultiScaleRoIAlign
  9. from libs.vision_libs.models.detection.backbone_utils import BackboneWithFPN, resnet_fpn_backbone
  10. from libs.vision_libs.models.detection.faster_rcnn import TwoMLPHead
  11. from models.line_detect.heads.arc.arc_heads import ArcHeads
  12. from models.line_detect.heads.circle.circle_heads import CircleHeads, CirclePredictor
  13. from .heads.decoder import FPNDecoder
  14. from models.line_detect.heads.line.line_heads import LinePredictor
  15. from models.line_detect.heads.point.point_heads import PointHeads, PointPredictor
  16. from .loi_heads import RoIHeads
  17. from .trainer import Trainer
  18. from ..base.backbone_factory import get_anchor_generator, MaxVitBackbone, \
  19. get_swin_transformer_fpn, get_efficientnetv2_fpn
  20. # from ..base.backbone_factory import get_convnext_fpn, get_anchor_generator
  21. from ..base.base_detection_net import BaseDetectionNet
  22. import torch.nn.functional as F
  23. from ..base.high_reso_maxvit import maxvit_with_fpn
  24. from ..base.high_reso_resnet import resnet50fpn, resnet18fpn, resnet101fpn, Bottleneck
  25. __all__ = [
  26. "LineDetect",
  27. "linedetect_resnet50_fpn",
  28. ]
  29. from ..line_net.line_detect import LineHeads
  30. def _default_anchorgen():
  31. anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
  32. aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
  33. return AnchorGenerator(anchor_sizes, aspect_ratios)
  34. class LineDetect(BaseDetectionNet):
  35. def __init__(
  36. self,
  37. backbone,
  38. num_classes=3,
  39. # transform parameters
  40. min_size=512,
  41. max_size=512,
  42. image_mean=None,
  43. image_std=None,
  44. # RPN parameters
  45. rpn_anchor_generator=None,
  46. rpn_head=None,
  47. rpn_pre_nms_top_n_train=2000,
  48. rpn_pre_nms_top_n_test=1000,
  49. rpn_post_nms_top_n_train=2000,
  50. rpn_post_nms_top_n_test=1000,
  51. rpn_nms_thresh=0.7,
  52. rpn_fg_iou_thresh=0.7,
  53. rpn_bg_iou_thresh=0.3,
  54. rpn_batch_size_per_image=256,
  55. rpn_positive_fraction=0.5,
  56. rpn_score_thresh=0.0,
  57. # Box parameters
  58. box_roi_pool=None,
  59. box_head=None,
  60. box_predictor=None,
  61. box_score_thresh=0.05,
  62. box_nms_thresh=0.5,
  63. box_detections_per_img=200,
  64. box_fg_iou_thresh=0.7,
  65. box_bg_iou_thresh=0.3,
  66. box_batch_size_per_image=512,
  67. box_positive_fraction=0.25,
  68. bbox_reg_weights=None,
  69. # line parameters
  70. line_roi_pool=None,
  71. line_head=None,
  72. line_predictor=None,
  73. # point parameters
  74. point_roi_pool=None,
  75. point_head=None,
  76. point_predictor=None,
  77. circle_head=None,
  78. circle_predictor=None,
  79. circle_roi_pool=None,
  80. # arc parameters
  81. arc_roi_pool=None,
  82. arc_head=None,
  83. arc_predictor=None,
  84. num_points=4,
  85. detect_point=False,
  86. detect_line=False,
  87. detect_arc=True,
  88. detect_circle=False,
  89. **kwargs,
  90. ):
  91. out_channels = backbone.out_channels
  92. if rpn_anchor_generator is None:
  93. rpn_anchor_generator = _default_anchorgen()
  94. if rpn_head is None:
  95. rpn_head = RPNHead(out_channels, rpn_anchor_generator.num_anchors_per_location()[0])
  96. rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
  97. rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)
  98. rpn = RegionProposalNetwork(
  99. rpn_anchor_generator,
  100. rpn_head,
  101. rpn_fg_iou_thresh,
  102. rpn_bg_iou_thresh,
  103. rpn_batch_size_per_image,
  104. rpn_positive_fraction,
  105. rpn_pre_nms_top_n,
  106. rpn_post_nms_top_n,
  107. rpn_nms_thresh,
  108. score_thresh=rpn_score_thresh,
  109. )
  110. if box_roi_pool is None:
  111. box_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=7, sampling_ratio=2)
  112. if box_head is None:
  113. resolution = box_roi_pool.output_size[0]
  114. representation_size = 1024
  115. box_head = TwoMLPHead(out_channels * resolution**2, representation_size)
  116. if box_predictor is None:
  117. representation_size = 1024
  118. box_predictor = ObjectionPredictor(representation_size, num_classes)
  119. roi_heads = RoIHeads(
  120. # Box
  121. box_roi_pool,
  122. box_head,
  123. box_predictor,
  124. box_fg_iou_thresh,
  125. box_bg_iou_thresh,
  126. box_batch_size_per_image,
  127. box_positive_fraction,
  128. bbox_reg_weights,
  129. box_score_thresh,
  130. box_nms_thresh,
  131. box_detections_per_img,
  132. detect_point=detect_point,
  133. detect_line=detect_line,
  134. detect_arc=detect_arc,
  135. detect_circle=detect_circle,
  136. )
  137. if image_mean is None:
  138. image_mean = [0.485, 0.456, 0.406]
  139. if image_std is None:
  140. image_std = [0.229, 0.224, 0.225]
  141. transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std, **kwargs)
  142. super().__init__(backbone, rpn, roi_heads, transform)
  143. if line_head is None and detect_line:
  144. layers = tuple(num_points for _ in range(8))
  145. line_head = LineHeads(8, layers)
  146. if line_predictor is None and detect_line:
  147. # keypoint_dim_reduced = 512 # == keypoint_layers[-1]
  148. line_predictor = LinePredictor(in_channels=256)
  149. if point_head is None and detect_point:
  150. layers = tuple(num_points for _ in range(8))
  151. point_head = PointHeads(8, layers)
  152. if point_predictor is None and detect_point:
  153. # keypoint_dim_reduced = 512 # == keypoint_layers[-1]
  154. point_predictor = PointPredictor(in_channels=256)
  155. if detect_arc and arc_head is None:
  156. layers = tuple(num_points for _ in range(8))
  157. arc_head=ArcHeads(8,layers)
  158. if detect_arc and arc_predictor is None:
  159. layers = tuple(num_points for _ in range(8))
  160. # arc_predictor=ArcPredictor(in_channels=256,out_channels=1)
  161. arc_predictor=FPNDecoder(Bottleneck)
  162. if detect_circle and circle_head is None:
  163. layers = tuple(num_points for _ in range(8))
  164. circle_head = CircleHeads(8, layers)
  165. if detect_circle and circle_predictor is None:
  166. layers = tuple(num_points for _ in range(8))
  167. # arc_predictor=ArcPredictor(in_channels=256,out_channels=1)
  168. # circle_predictor = CirclePredictor(in_channels=256,out_channels=4)
  169. circle_predictor=FPNDecoder(Bottleneck)
  170. self.roi_heads.line_roi_pool = line_roi_pool
  171. self.roi_heads.line_head = line_head
  172. self.roi_heads.line_predictor = line_predictor
  173. self.roi_heads.point_roi_pool = point_roi_pool
  174. self.roi_heads.point_head = point_head
  175. self.roi_heads.point_predictor = point_predictor
  176. self.roi_heads.arc_roi_pool = arc_roi_pool
  177. self.roi_heads.arc_head = arc_head
  178. self.roi_heads.arc_predictor = arc_predictor
  179. self.roi_heads.circle_roi_pool = circle_roi_pool
  180. self.roi_heads.circle_head = circle_head
  181. self.roi_heads.circle_predictor = circle_predictor
  182. def start_train(self, cfg):
  183. # cfg = read_yaml(cfg)
  184. self.trainer = Trainer()
  185. self.trainer.train_from_cfg(model=self, cfg=cfg)
  186. def load_weights(self, save_path, device='cuda'):
  187. if os.path.exists(save_path):
  188. checkpoint = torch.load(save_path, map_location=device)
  189. self.load_state_dict(checkpoint['model_state_dict'])
  190. # if optimizer is not None:
  191. # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
  192. # epoch = checkpoint['epoch']
  193. # loss = checkpoint['loss']
  194. # print(f"Loaded best model from {save_path} at epoch {epoch} with loss {loss:.4f}")
  195. print(f"Loaded model from {save_path}")
  196. else:
  197. print(f"No saved model found at {save_path}")
  198. return self
  199. class TwoMLPHead(nn.Module):
  200. """
  201. Standard heads for FPN-based models
  202. Args:
  203. in_channels (int): number of input channels
  204. representation_size (int): size of the intermediate representation
  205. """
  206. def __init__(self, in_channels, representation_size):
  207. super().__init__()
  208. self.fc6 = nn.Linear(in_channels, representation_size)
  209. self.fc7 = nn.Linear(representation_size, representation_size)
  210. def forward(self, x):
  211. x = x.flatten(start_dim=1)
  212. x = F.relu(self.fc6(x))
  213. x = F.relu(self.fc7(x))
  214. return x
  215. class ObjectionConvFCHead(nn.Sequential):
  216. def __init__(
  217. self,
  218. input_size: Tuple[int, int, int],
  219. conv_layers: List[int],
  220. fc_layers: List[int],
  221. norm_layer: Optional[Callable[..., nn.Module]] = None,
  222. ):
  223. """
  224. Args:
  225. input_size (Tuple[int, int, int]): the input size in CHW format.
  226. conv_layers (list): feature dimensions of each Convolution layer
  227. fc_layers (list): feature dimensions of each FCN layer
  228. norm_layer (callable, optional): Module specifying the normalization layer to use. Default: None
  229. """
  230. in_channels, in_height, in_width = input_size
  231. blocks = []
  232. previous_channels = in_channels
  233. for current_channels in conv_layers:
  234. blocks.append(misc_nn_ops.Conv2dNormActivation(previous_channels, current_channels, norm_layer=norm_layer))
  235. previous_channels = current_channels
  236. blocks.append(nn.Flatten())
  237. previous_channels = previous_channels * in_height * in_width
  238. for current_channels in fc_layers:
  239. blocks.append(nn.Linear(previous_channels, current_channels))
  240. blocks.append(nn.ReLU(inplace=True))
  241. previous_channels = current_channels
  242. super().__init__(*blocks)
  243. for layer in self.modules():
  244. if isinstance(layer, nn.Conv2d):
  245. nn.init.kaiming_normal_(layer.weight, mode="fan_out", nonlinearity="relu")
  246. if layer.bias is not None:
  247. nn.init.zeros_(layer.bias)
  248. class ObjectionPredictor(nn.Module):
  249. """
  250. Standard classification + bounding box regression layers
  251. for Fast R-CNN.
  252. Args:
  253. in_channels (int): number of input channels
  254. num_classes (int): number of output classes (including background)
  255. """
  256. def __init__(self, in_channels, num_classes):
  257. super().__init__()
  258. self.cls_score = nn.Linear(in_channels, num_classes)
  259. self.bbox_pred = nn.Linear(in_channels, num_classes * 4)
  260. def forward(self, x):
  261. if x.dim() == 4:
  262. torch._assert(
  263. list(x.shape[2:]) == [1, 1],
  264. f"x has the wrong shape, expecting the last two dimensions to be [1,1] instead of {list(x.shape[2:])}",
  265. )
  266. x = x.flatten(start_dim=1)
  267. scores = self.cls_score(x)
  268. bbox_deltas = self.bbox_pred(x)
  269. return scores, bbox_deltas
  270. def linedetect_newresnet18fpn(
  271. *,
  272. num_classes: Optional[int] = None,
  273. num_points:Optional[int] = None,
  274. **kwargs: Any,
  275. ) -> LineDetect:
  276. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  277. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  278. if num_classes is None:
  279. num_classes = 5
  280. if num_points is None:
  281. num_points = 4
  282. size=768
  283. backbone =resnet18fpn()
  284. featmap_names=['0', '1', '2', '3','4','pool']
  285. # print(f'featmap_names:{featmap_names}')
  286. roi_pooler = MultiScaleRoIAlign(
  287. featmap_names=featmap_names,
  288. output_size=7,
  289. sampling_ratio=2
  290. )
  291. num_features=len(featmap_names)
  292. anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
  293. # print(f'anchor_sizes:{anchor_sizes}')
  294. aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
  295. # print(f'aspect_ratios:{aspect_ratios}')
  296. anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
  297. model = LineDetect(backbone,
  298. num_classes,min_size=size,max_size=size, num_points=num_points,
  299. rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
  300. detect_point=False,
  301. detect_line=False,
  302. detect_arc=False,
  303. detect_circle=True,
  304. **kwargs)
  305. return model
  306. def linedetect_newresnet50fpn(
  307. *,
  308. num_classes: Optional[int] = None,
  309. num_points:Optional[int] = None,
  310. **kwargs: Any,
  311. ) -> LineDetect:
  312. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  313. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  314. if num_classes is None:
  315. num_classes = 5
  316. if num_points is None:
  317. num_points = 4
  318. size=768
  319. backbone =resnet50fpn(out_channels=256)
  320. featmap_names=['0', '1', '2', '3','4','pool']
  321. # print(f'featmap_names:{featmap_names}')
  322. roi_pooler = MultiScaleRoIAlign(
  323. featmap_names=featmap_names,
  324. output_size=7,
  325. sampling_ratio=2
  326. )
  327. num_features=len(featmap_names)
  328. anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
  329. # print(f'anchor_sizes:{anchor_sizes}')
  330. aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
  331. # print(f'aspect_ratios:{aspect_ratios}')
  332. anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
  333. model = LineDetect(backbone, num_classes,min_size=size,max_size=size, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
  334. detect_point=False,
  335. detect_line=False,
  336. detect_arc=False,
  337. detect_circle=True,
  338. **kwargs)
  339. return model
  340. def linedetect_newresnet101fpn(
  341. *,
  342. num_classes: Optional[int] = None,
  343. num_points:Optional[int] = None,
  344. **kwargs: Any,
  345. ) -> LineDetect:
  346. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  347. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  348. if num_classes is None:
  349. num_classes = 5
  350. if num_points is None:
  351. num_points = 3
  352. size=768
  353. backbone =resnet101fpn(out_channels=256)
  354. featmap_names=['0', '1', '2', '3','4','pool']
  355. # print(f'featmap_names:{featmap_names}')
  356. roi_pooler = MultiScaleRoIAlign(
  357. featmap_names=featmap_names,
  358. output_size=7,
  359. sampling_ratio=2
  360. )
  361. num_features=len(featmap_names)
  362. anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
  363. # print(f'anchor_sizes:{anchor_sizes}')
  364. aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
  365. # print(f'aspect_ratios:{aspect_ratios}')
  366. anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
  367. model = LineDetect(backbone, num_classes,min_size=size,max_size=size, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
  368. detect_point=False,
  369. detect_line=False,
  370. detect_arc=False,
  371. detect_circle=True,
  372. **kwargs)
  373. return model
  374. def linedetect_newresnet152fpn(
  375. *,
  376. num_classes: Optional[int] = None,
  377. num_points:Optional[int] = None,
  378. **kwargs: Any,
  379. ) -> LineDetect:
  380. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  381. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  382. if num_classes is None:
  383. num_classes = 5
  384. if num_points is None:
  385. num_points = 3
  386. size=768
  387. backbone =resnet101fpn(out_channels=256)
  388. featmap_names=['0', '1', '2', '3','4','pool']
  389. # print(f'featmap_names:{featmap_names}')
  390. roi_pooler = MultiScaleRoIAlign(
  391. featmap_names=featmap_names,
  392. output_size=7,
  393. sampling_ratio=2
  394. )
  395. num_features=len(featmap_names)
  396. anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
  397. # print(f'anchor_sizes:{anchor_sizes}')
  398. aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
  399. # print(f'aspect_ratios:{aspect_ratios}')
  400. anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
  401. model = LineDetect(backbone, num_classes,min_size=size,max_size=size, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
  402. detect_point=False,
  403. detect_line=False,
  404. detect_arc=False,
  405. detect_circle=True,
  406. **kwargs)
  407. return model
  408. def linedetect_efficientnet(
  409. *,
  410. num_classes: Optional[int] = None,
  411. num_points:Optional[int] = None,
  412. name: Optional[str] = 'efficientnet_v2_l',
  413. **kwargs: Any,
  414. ) -> LineDetect:
  415. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  416. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  417. if num_classes is None:
  418. num_classes = 5
  419. if num_points is None:
  420. num_points = 3
  421. size=224*3
  422. featmap_names = ['0', '1', '2', '3', '4', 'pool']
  423. roi_pooler = MultiScaleRoIAlign(
  424. featmap_names=featmap_names,
  425. output_size=7,
  426. sampling_ratio=2
  427. )
  428. backbone_with_fpn=get_efficientnetv2_fpn(name=name)
  429. test_input = torch.randn(1, 3,size,size)
  430. model = LineDetect(
  431. backbone=backbone_with_fpn,
  432. min_size=size,
  433. max_size=size,
  434. num_classes=num_classes, # COCO 数据集有 91 类
  435. rpn_anchor_generator=get_anchor_generator(backbone_with_fpn, test_input=test_input),
  436. box_roi_pool=roi_pooler,
  437. detect_line=False,
  438. detect_point=False,
  439. detect_arc=False,
  440. detect_circle=True,
  441. )
  442. return model
  443. def linedetect_maxvitfpn(
  444. *,
  445. num_classes: Optional[int] = None,
  446. num_points:Optional[int] = None,
  447. **kwargs: Any,
  448. ) -> LineDetect:
  449. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  450. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  451. if num_classes is None:
  452. num_classes = 5
  453. if num_points is None:
  454. num_points = 3
  455. size=224*3
  456. maxvit = MaxVitBackbone(input_size=(size,size))
  457. # print(maxvit.named_children())
  458. # for i,layer in enumerate(maxvit.named_children()):
  459. # print(f'layer:{i}:{layer}')
  460. in_channels_list = [64, 64, 128, 256, 512]
  461. featmap_names = ['0', '1', '2', '3', '4', 'pool']
  462. roi_pooler = MultiScaleRoIAlign(
  463. featmap_names=featmap_names,
  464. output_size=7,
  465. sampling_ratio=2
  466. )
  467. backbone_with_fpn = BackboneWithFPN(
  468. maxvit,
  469. return_layers={'stem': '0', 'block0': '1', 'block1': '2', 'block2': '3', 'block3': '4'},
  470. # 确保这些键对应到实际的层
  471. in_channels_list=in_channels_list,
  472. out_channels=256
  473. )
  474. test_input = torch.randn(1, 3,size,size)
  475. model = LineDetect(
  476. backbone=backbone_with_fpn,
  477. min_size=size,
  478. max_size=size,
  479. num_classes=num_classes, # COCO 数据集有 91 类
  480. rpn_anchor_generator=get_anchor_generator(backbone_with_fpn, test_input=test_input),
  481. box_roi_pool=roi_pooler,
  482. detect_line=False,
  483. detect_point=False,
  484. detect_arc=False,
  485. detect_circle=True,
  486. )
  487. return model
  488. def linedetect_high_maxvitfpn(
  489. *,
  490. num_classes: Optional[int] = None,
  491. num_points:Optional[int] = None,
  492. **kwargs: Any,
  493. ) -> LineDetect:
  494. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  495. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  496. if num_classes is None:
  497. num_classes = 5
  498. if num_points is None:
  499. num_points = 3
  500. size=224*2
  501. maxvitfpn =maxvit_with_fpn(size=size)
  502. # print(maxvit.named_children())
  503. # for i,layer in enumerate(maxvit.named_children()):
  504. # print(f'layer:{i}:{layer}')
  505. in_channels_list = [64,64, 64, 128, 256, 512]
  506. featmap_names = ['0', '1', '2', '3', '4', '5','pool']
  507. roi_pooler = MultiScaleRoIAlign(
  508. featmap_names=featmap_names,
  509. output_size=7,
  510. sampling_ratio=2
  511. )
  512. test_input = torch.randn(1, 3,size,size)
  513. model = LineDetect(
  514. backbone=maxvitfpn,
  515. num_classes=num_classes,
  516. min_size=size,
  517. max_size=size,
  518. rpn_anchor_generator=get_anchor_generator(maxvitfpn, test_input=test_input),
  519. box_roi_pool=roi_pooler
  520. )
  521. return model
  522. def linedetect_swin_transformer_fpn(
  523. *,
  524. num_classes: Optional[int] = None,
  525. num_points:Optional[int] = None,
  526. type='t',
  527. **kwargs: Any,
  528. ) -> LineDetect:
  529. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  530. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  531. if num_classes is None:
  532. num_classes = 3
  533. if num_points is None:
  534. num_points = 3
  535. size=512
  536. backbone_with_fpn, roi_pooler, anchor_generator=get_swin_transformer_fpn(type=type)
  537. # test_input = torch.randn(1, 3,size,size)
  538. model = LineDetect(
  539. backbone=backbone_with_fpn,
  540. min_size=size,
  541. max_size=size,
  542. num_classes=3, # COCO 数据集有 91 类
  543. rpn_anchor_generator=anchor_generator,
  544. box_roi_pool=roi_pooler,
  545. detect_line=False,
  546. detect_point=False,
  547. )
  548. return model
  549. def linedetect_resnet18_fpn(
  550. *,
  551. num_classes: Optional[int] = None,
  552. num_points: Optional[int] = None,
  553. **kwargs: Any,
  554. ) -> LineDetect:
  555. if num_classes is None:
  556. num_classes = 4
  557. if num_points is None:
  558. num_points = 3
  559. size=1024
  560. backbone = resnet_fpn_backbone(backbone_name='resnet18',weights=None)
  561. model = LineDetect(backbone,min_size=size,max_size=size , num_classes=num_classes, num_points=num_points, **kwargs)
  562. return model
  563. def linedetect_resnet50_fpn(
  564. *,
  565. num_classes: Optional[int] = None,
  566. num_points: Optional[int] = None,
  567. **kwargs: Any,
  568. ) -> LineDetect:
  569. if num_classes is None:
  570. num_classes = 3
  571. if num_points is None:
  572. num_points = 3
  573. backbone = resnet_fpn_backbone(backbone_name='resnet18', weights=None)
  574. model = LineDetect(backbone, num_classes, num_points=num_points, **kwargs)
  575. return model