line_detect.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705
  1. import os
  2. from typing import Any, Callable, List, Optional, Tuple
  3. import torch
  4. from torch import nn
  5. from libs.vision_libs.models.detection.anchor_utils import AnchorGenerator
  6. from libs.vision_libs.models.detection.rpn import RPNHead, RegionProposalNetwork
  7. from libs.vision_libs.models.detection.transform import GeneralizedRCNNTransform
  8. from libs.vision_libs.ops import misc as misc_nn_ops, MultiScaleRoIAlign
  9. from libs.vision_libs.models.detection.backbone_utils import BackboneWithFPN, resnet_fpn_backbone
  10. from libs.vision_libs.models.detection.faster_rcnn import TwoMLPHead
  11. from models.line_detect.heads.arc.arc_heads import ArcHeads, ArcEquationHead
  12. from models.line_detect.heads.circle.circle_heads import CircleHeads, CirclePredictor
  13. from .heads.decoder import FPNDecoder
  14. from models.line_detect.heads.line.line_heads import LinePredictor
  15. from models.line_detect.heads.point.point_heads import PointHeads, PointPredictor
  16. from .heads.ins.ins_predictor import ArcEquationPredictor
  17. from .loi_heads import RoIHeads
  18. from .trainer import Trainer
  19. from ..base.backbone_factory import get_anchor_generator, MaxVitBackbone, \
  20. get_swin_transformer_fpn, get_efficientnetv2_fpn
  21. # from ..base.backbone_factory import get_convnext_fpn, get_anchor_generator
  22. from ..base.base_detection_net import BaseDetectionNet
  23. import torch.nn.functional as F
  24. from ..base.high_reso_maxvit import maxvit_with_fpn
  25. from ..base.high_reso_resnet import resnet50fpn, resnet18fpn, resnet101fpn, Bottleneck
  26. __all__ = [
  27. "LineDetect",
  28. "linedetect_resnet50_fpn",
  29. ]
  30. from ..line_net.line_detect import LineHeads
  31. def _default_anchorgen():
  32. anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
  33. aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
  34. return AnchorGenerator(anchor_sizes, aspect_ratios)
  35. class LineDetect(BaseDetectionNet):
  36. def __init__(
  37. self,
  38. backbone,
  39. num_classes=3,
  40. # transform parameters
  41. min_size=512,
  42. max_size=512,
  43. image_mean=None,
  44. image_std=None,
  45. # RPN parameters
  46. rpn_anchor_generator=None,
  47. rpn_head=None,
  48. rpn_pre_nms_top_n_train=2000,
  49. rpn_pre_nms_top_n_test=1000,
  50. rpn_post_nms_top_n_train=2000,
  51. rpn_post_nms_top_n_test=1000,
  52. rpn_nms_thresh=0.7,
  53. rpn_fg_iou_thresh=0.7,
  54. rpn_bg_iou_thresh=0.3,
  55. rpn_batch_size_per_image=256,
  56. rpn_positive_fraction=0.5,
  57. rpn_score_thresh=0.0,
  58. # Box parameters
  59. box_roi_pool=None,
  60. box_head=None,
  61. box_predictor=None,
  62. box_score_thresh=0.05,
  63. box_nms_thresh=0.5,
  64. box_detections_per_img=200,
  65. box_fg_iou_thresh=0.7,
  66. box_bg_iou_thresh=0.3,
  67. box_batch_size_per_image=512,
  68. box_positive_fraction=0.25,
  69. bbox_reg_weights=None,
  70. # line parameters
  71. line_roi_pool=None,
  72. line_head=None,
  73. line_predictor=None,
  74. # point parameters
  75. point_roi_pool=None,
  76. point_head=None,
  77. point_predictor=None,
  78. ins_head=None,
  79. ins_predictor=None,
  80. circle_roi_pool=None,
  81. arc_equation_head=None,
  82. # arc parameters
  83. arc_roi_pool=None,
  84. arc_head=None,
  85. arc_predictor=None,
  86. num_points=4,
  87. detect_point=False,
  88. detect_line=False,
  89. detect_arc=True,
  90. detect_ins=False,
  91. **kwargs,
  92. ):
  93. out_channels = backbone.out_channels
  94. if rpn_anchor_generator is None:
  95. rpn_anchor_generator = _default_anchorgen()
  96. if rpn_head is None:
  97. rpn_head = RPNHead(out_channels, rpn_anchor_generator.num_anchors_per_location()[0])
  98. rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
  99. rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)
  100. rpn = RegionProposalNetwork(
  101. rpn_anchor_generator,
  102. rpn_head,
  103. rpn_fg_iou_thresh,
  104. rpn_bg_iou_thresh,
  105. rpn_batch_size_per_image,
  106. rpn_positive_fraction,
  107. rpn_pre_nms_top_n,
  108. rpn_post_nms_top_n,
  109. rpn_nms_thresh,
  110. score_thresh=rpn_score_thresh,
  111. )
  112. if box_roi_pool is None:
  113. box_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=7, sampling_ratio=2)
  114. if box_head is None:
  115. resolution = box_roi_pool.output_size[0]
  116. representation_size = 1024
  117. box_head = TwoMLPHead(out_channels * resolution**2, representation_size)
  118. if box_predictor is None:
  119. representation_size = 1024
  120. box_predictor = ObjectionPredictor(representation_size, num_classes)
  121. roi_heads = RoIHeads(
  122. # Box
  123. box_roi_pool,
  124. box_head,
  125. box_predictor,
  126. box_fg_iou_thresh,
  127. box_bg_iou_thresh,
  128. box_batch_size_per_image,
  129. box_positive_fraction,
  130. bbox_reg_weights,
  131. box_score_thresh,
  132. box_nms_thresh,
  133. box_detections_per_img,
  134. detect_point=detect_point,
  135. detect_line=detect_line,
  136. detect_arc=detect_arc,
  137. detect_ins=detect_ins,
  138. )
  139. if image_mean is None:
  140. image_mean = [0.485, 0.456, 0.406]
  141. if image_std is None:
  142. image_std = [0.229, 0.224, 0.225]
  143. transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std, **kwargs)
  144. super().__init__(backbone, rpn, roi_heads, transform)
  145. if line_head is None and detect_line:
  146. layers = tuple(num_points for _ in range(8))
  147. line_head = LineHeads(8, layers)
  148. if line_predictor is None and detect_line:
  149. # keypoint_dim_reduced = 512 # == keypoint_layers[-1]
  150. line_predictor = LinePredictor(in_channels=256)
  151. if point_head is None and detect_point:
  152. layers = tuple(num_points for _ in range(8))
  153. point_head = PointHeads(8, layers)
  154. if point_predictor is None and detect_point:
  155. # keypoint_dim_reduced = 512 # == keypoint_layers[-1]
  156. point_predictor = PointPredictor(in_channels=256)
  157. if detect_arc and arc_head is None:
  158. layers = tuple(num_points for _ in range(8))
  159. arc_head=ArcHeads(8,layers)
  160. if detect_arc and arc_predictor is None:
  161. layers = tuple(num_points for _ in range(8))
  162. # arc_predictor=ArcPredictor(in_channels=256,out_channels=1)
  163. arc_predictor=FPNDecoder(Bottleneck)
  164. if detect_ins and ins_head is None:
  165. layers = tuple(num_points for _ in range(8))
  166. ins_head = FPNDecoder(Bottleneck)
  167. if detect_ins and ins_predictor is None:
  168. # layers = tuple(num_points for _ in range(8))
  169. # arc_predictor=ArcPredictor(in_channels=256,out_channels=1)
  170. # circle_predictor = CirclePredictor(in_channels=256,out_channels=4)
  171. ins_predictor=ArcEquationPredictor()
  172. arc_equation_head = ArcEquationHead()
  173. self.roi_heads.line_roi_pool = line_roi_pool
  174. self.roi_heads.line_head = line_head
  175. self.roi_heads.line_predictor = line_predictor
  176. self.roi_heads.point_roi_pool = point_roi_pool
  177. self.roi_heads.point_head = point_head
  178. self.roi_heads.point_predictor = point_predictor
  179. self.roi_heads.arc_roi_pool = arc_roi_pool
  180. self.roi_heads.arc_head = arc_head
  181. self.roi_heads.arc_predictor = arc_predictor
  182. self.roi_heads.ins_roi_pool = circle_roi_pool
  183. self.roi_heads.ins_head = ins_head
  184. self.roi_heads.ins_predictor = ins_predictor
  185. self.roi_heads.arc_equation_head = arc_equation_head
  186. def start_train(self, cfg):
  187. # cfg = read_yaml(cfg)
  188. self.trainer = Trainer()
  189. self.trainer.train_from_cfg(model=self, cfg=cfg)
  190. def load_weights(self, save_path, device='cuda'):
  191. if os.path.exists(save_path):
  192. checkpoint = torch.load(save_path, map_location=device)
  193. self.load_state_dict(checkpoint['model_state_dict'])
  194. # if optimizer is not None:
  195. # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
  196. # epoch = checkpoint['epoch']
  197. # loss = checkpoint['loss']
  198. # print(f"Loaded best model from {save_path} at epoch {epoch} with loss {loss:.4f}")
  199. print(f"Loaded model from {save_path}")
  200. else:
  201. print(f"No saved model found at {save_path}")
  202. return self
  203. class TwoMLPHead(nn.Module):
  204. """
  205. Standard heads for FPN-based models
  206. Args:
  207. in_channels (int): number of input channels
  208. representation_size (int): size of the intermediate representation
  209. """
  210. def __init__(self, in_channels, representation_size):
  211. super().__init__()
  212. self.fc6 = nn.Linear(in_channels, representation_size)
  213. self.fc7 = nn.Linear(representation_size, representation_size)
  214. def forward(self, x):
  215. x = x.flatten(start_dim=1)
  216. x = F.relu(self.fc6(x))
  217. x = F.relu(self.fc7(x))
  218. return x
  219. class ObjectionConvFCHead(nn.Sequential):
  220. def __init__(
  221. self,
  222. input_size: Tuple[int, int, int],
  223. conv_layers: List[int],
  224. fc_layers: List[int],
  225. norm_layer: Optional[Callable[..., nn.Module]] = None,
  226. ):
  227. """
  228. Args:
  229. input_size (Tuple[int, int, int]): the input size in CHW format.
  230. conv_layers (list): feature dimensions of each Convolution layer
  231. fc_layers (list): feature dimensions of each FCN layer
  232. norm_layer (callable, optional): Module specifying the normalization layer to use. Default: None
  233. """
  234. in_channels, in_height, in_width = input_size
  235. blocks = []
  236. previous_channels = in_channels
  237. for current_channels in conv_layers:
  238. blocks.append(misc_nn_ops.Conv2dNormActivation(previous_channels, current_channels, norm_layer=norm_layer))
  239. previous_channels = current_channels
  240. blocks.append(nn.Flatten())
  241. previous_channels = previous_channels * in_height * in_width
  242. for current_channels in fc_layers:
  243. blocks.append(nn.Linear(previous_channels, current_channels))
  244. blocks.append(nn.ReLU(inplace=True))
  245. previous_channels = current_channels
  246. super().__init__(*blocks)
  247. for layer in self.modules():
  248. if isinstance(layer, nn.Conv2d):
  249. nn.init.kaiming_normal_(layer.weight, mode="fan_out", nonlinearity="relu")
  250. if layer.bias is not None:
  251. nn.init.zeros_(layer.bias)
  252. class ObjectionPredictor(nn.Module):
  253. """
  254. Standard classification + bounding box regression layers
  255. for Fast R-CNN.
  256. Args:
  257. in_channels (int): number of input channels
  258. num_classes (int): number of output classes (including background)
  259. """
  260. def __init__(self, in_channels, num_classes):
  261. super().__init__()
  262. self.cls_score = nn.Linear(in_channels, num_classes)
  263. self.bbox_pred = nn.Linear(in_channels, num_classes * 4)
  264. def forward(self, x):
  265. if x.dim() == 4:
  266. torch._assert(
  267. list(x.shape[2:]) == [1, 1],
  268. f"x has the wrong shape, expecting the last two dimensions to be [1,1] instead of {list(x.shape[2:])}",
  269. )
  270. x = x.flatten(start_dim=1)
  271. scores = self.cls_score(x)
  272. bbox_deltas = self.bbox_pred(x)
  273. return scores, bbox_deltas
  274. def linedetect_newresnet18fpn(
  275. *,
  276. num_classes: Optional[int] = None,
  277. num_points:Optional[int] = None,
  278. **kwargs: Any,
  279. ) -> LineDetect:
  280. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  281. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  282. if num_classes is None:
  283. num_classes = 5
  284. if num_points is None:
  285. num_points = 4
  286. size=768
  287. backbone =resnet18fpn()
  288. featmap_names=['0', '1', '2', '3','4','pool']
  289. # print(f'featmap_names:{featmap_names}')
  290. roi_pooler = MultiScaleRoIAlign(
  291. featmap_names=featmap_names,
  292. output_size=7,
  293. sampling_ratio=2
  294. )
  295. num_features=len(featmap_names)
  296. anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
  297. # print(f'anchor_sizes:{anchor_sizes}')
  298. aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
  299. # print(f'aspect_ratios:{aspect_ratios}')
  300. anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
  301. model = LineDetect(backbone,
  302. num_classes, min_size=size, max_size=size, num_points=num_points,
  303. rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
  304. detect_point=False,
  305. detect_line=False,
  306. detect_arc=False,
  307. detect_ins=True,
  308. **kwargs)
  309. return model
  310. def linedetect_newresnet50fpn(
  311. *,
  312. num_classes: Optional[int] = None,
  313. num_points:Optional[int] = None,
  314. **kwargs: Any,
  315. ) -> LineDetect:
  316. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  317. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  318. if num_classes is None:
  319. num_classes = 5
  320. if num_points is None:
  321. num_points = 4
  322. size=768
  323. backbone =resnet50fpn(out_channels=256)
  324. featmap_names=['0', '1', '2', '3','4','pool']
  325. # print(f'featmap_names:{featmap_names}')
  326. roi_pooler = MultiScaleRoIAlign(
  327. featmap_names=featmap_names,
  328. output_size=7,
  329. sampling_ratio=2
  330. )
  331. num_features=len(featmap_names)
  332. anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
  333. # print(f'anchor_sizes:{anchor_sizes}')
  334. aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
  335. # print(f'aspect_ratios:{aspect_ratios}')
  336. anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
  337. model = LineDetect(backbone, num_classes, min_size=size, max_size=size, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
  338. detect_point=False,
  339. detect_line=False,
  340. detect_arc=False,
  341. detect_ins=True,
  342. **kwargs)
  343. return model
  344. def linedetect_newresnet101fpn(
  345. *,
  346. num_classes: Optional[int] = None,
  347. num_points:Optional[int] = None,
  348. **kwargs: Any,
  349. ) -> LineDetect:
  350. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  351. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  352. if num_classes is None:
  353. num_classes = 5
  354. if num_points is None:
  355. num_points = 3
  356. size=768
  357. backbone =resnet101fpn(out_channels=256)
  358. featmap_names=['0', '1', '2', '3','4','pool']
  359. # print(f'featmap_names:{featmap_names}')
  360. roi_pooler = MultiScaleRoIAlign(
  361. featmap_names=featmap_names,
  362. output_size=7,
  363. sampling_ratio=2
  364. )
  365. num_features=len(featmap_names)
  366. anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
  367. # print(f'anchor_sizes:{anchor_sizes}')
  368. aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
  369. # print(f'aspect_ratios:{aspect_ratios}')
  370. anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
  371. model = LineDetect(backbone, num_classes, min_size=size, max_size=size, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
  372. detect_point=False,
  373. detect_line=False,
  374. detect_arc=False,
  375. detect_ins=True,
  376. **kwargs)
  377. return model
  378. def linedetect_newresnet152fpn(
  379. *,
  380. num_classes: Optional[int] = None,
  381. num_points:Optional[int] = None,
  382. **kwargs: Any,
  383. ) -> LineDetect:
  384. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  385. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  386. if num_classes is None:
  387. num_classes = 5
  388. if num_points is None:
  389. num_points = 3
  390. size=768
  391. backbone =resnet101fpn(out_channels=256)
  392. featmap_names=['0', '1', '2', '3','4','pool']
  393. # print(f'featmap_names:{featmap_names}')
  394. roi_pooler = MultiScaleRoIAlign(
  395. featmap_names=featmap_names,
  396. output_size=7,
  397. sampling_ratio=2
  398. )
  399. num_features=len(featmap_names)
  400. anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
  401. # print(f'anchor_sizes:{anchor_sizes}')
  402. aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
  403. # print(f'aspect_ratios:{aspect_ratios}')
  404. anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
  405. model = LineDetect(backbone, num_classes, min_size=size, max_size=size, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
  406. detect_point=False,
  407. detect_line=False,
  408. detect_arc=False,
  409. detect_ins=True,
  410. **kwargs)
  411. return model
  412. def linedetect_efficientnet(
  413. *,
  414. num_classes: Optional[int] = None,
  415. num_points:Optional[int] = None,
  416. name: Optional[str] = 'efficientnet_v2_l',
  417. **kwargs: Any,
  418. ) -> LineDetect:
  419. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  420. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  421. if num_classes is None:
  422. num_classes = 7
  423. if num_points is None:
  424. num_points = 3
  425. size=224*3
  426. featmap_names = ['0', '1', '2', '3', '4', 'pool']
  427. roi_pooler = MultiScaleRoIAlign(
  428. featmap_names=featmap_names,
  429. output_size=7,
  430. sampling_ratio=2
  431. )
  432. backbone_with_fpn=get_efficientnetv2_fpn(name=name)
  433. test_input = torch.randn(1, 3,size,size)
  434. model = LineDetect(
  435. backbone=backbone_with_fpn,
  436. min_size=size,
  437. max_size=size,
  438. num_classes=num_classes, # COCO 数据集有 91 类
  439. rpn_anchor_generator=get_anchor_generator(backbone_with_fpn, test_input=test_input),
  440. box_roi_pool=roi_pooler,
  441. detect_line=True,
  442. detect_point=False,
  443. detect_arc=False,
  444. detect_ins=True,
  445. )
  446. return model
  447. def linedetect_maxvitfpn(
  448. *,
  449. num_classes: Optional[int] = None,
  450. num_points:Optional[int] = None,
  451. **kwargs: Any,
  452. ) -> LineDetect:
  453. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  454. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  455. if num_classes is None:
  456. num_classes = 5
  457. if num_points is None:
  458. num_points = 3
  459. size=224*3
  460. maxvit = MaxVitBackbone(input_size=(size,size))
  461. # print(maxvit.named_children())
  462. # for i,layer in enumerate(maxvit.named_children()):
  463. # print(f'layer:{i}:{layer}')
  464. in_channels_list = [64, 64, 128, 256, 512]
  465. featmap_names = ['0', '1', '2', '3', '4', 'pool']
  466. roi_pooler = MultiScaleRoIAlign(
  467. featmap_names=featmap_names,
  468. output_size=7,
  469. sampling_ratio=2
  470. )
  471. backbone_with_fpn = BackboneWithFPN(
  472. maxvit,
  473. return_layers={'stem': '0', 'block0': '1', 'block1': '2', 'block2': '3', 'block3': '4'},
  474. # 确保这些键对应到实际的层
  475. in_channels_list=in_channels_list,
  476. out_channels=256
  477. )
  478. test_input = torch.randn(1, 3,size,size)
  479. model = LineDetect(
  480. backbone=backbone_with_fpn,
  481. min_size=size,
  482. max_size=size,
  483. num_classes=num_classes, # COCO 数据集有 91 类
  484. rpn_anchor_generator=get_anchor_generator(backbone_with_fpn, test_input=test_input),
  485. box_roi_pool=roi_pooler,
  486. detect_line=False,
  487. detect_point=False,
  488. detect_arc=False,
  489. detect_ins=True,
  490. )
  491. return model
  492. def linedetect_high_maxvitfpn(
  493. *,
  494. num_classes: Optional[int] = None,
  495. num_points:Optional[int] = None,
  496. **kwargs: Any,
  497. ) -> LineDetect:
  498. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  499. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  500. if num_classes is None:
  501. num_classes = 5
  502. if num_points is None:
  503. num_points = 3
  504. size=224*2
  505. maxvitfpn =maxvit_with_fpn(size=size)
  506. # print(maxvit.named_children())
  507. # for i,layer in enumerate(maxvit.named_children()):
  508. # print(f'layer:{i}:{layer}')
  509. in_channels_list = [64,64, 64, 128, 256, 512]
  510. featmap_names = ['0', '1', '2', '3', '4', '5','pool']
  511. roi_pooler = MultiScaleRoIAlign(
  512. featmap_names=featmap_names,
  513. output_size=7,
  514. sampling_ratio=2
  515. )
  516. test_input = torch.randn(1, 3,size,size)
  517. model = LineDetect(
  518. backbone=maxvitfpn,
  519. num_classes=num_classes,
  520. min_size=size,
  521. max_size=size,
  522. rpn_anchor_generator=get_anchor_generator(maxvitfpn, test_input=test_input),
  523. box_roi_pool=roi_pooler
  524. )
  525. return model
  526. def linedetect_swin_transformer_fpn(
  527. *,
  528. num_classes: Optional[int] = None,
  529. num_points:Optional[int] = None,
  530. type='t',
  531. **kwargs: Any,
  532. ) -> LineDetect:
  533. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  534. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  535. if num_classes is None:
  536. num_classes = 3
  537. if num_points is None:
  538. num_points = 3
  539. size=512
  540. backbone_with_fpn, roi_pooler, anchor_generator=get_swin_transformer_fpn(type=type)
  541. # test_input = torch.randn(1, 3,size,size)
  542. model = LineDetect(
  543. backbone=backbone_with_fpn,
  544. min_size=size,
  545. max_size=size,
  546. num_classes=3, # COCO 数据集有 91 类
  547. rpn_anchor_generator=anchor_generator,
  548. box_roi_pool=roi_pooler,
  549. detect_line=False,
  550. detect_point=False,
  551. )
  552. return model
  553. def linedetect_resnet18_fpn(
  554. *,
  555. num_classes: Optional[int] = None,
  556. num_points: Optional[int] = None,
  557. **kwargs: Any,
  558. ) -> LineDetect:
  559. if num_classes is None:
  560. num_classes = 4
  561. if num_points is None:
  562. num_points = 3
  563. size=1024
  564. backbone = resnet_fpn_backbone(backbone_name='resnet18',weights=None)
  565. model = LineDetect(backbone,min_size=size,max_size=size , num_classes=num_classes, num_points=num_points, **kwargs)
  566. return model
  567. def linedetect_resnet50_fpn(
  568. *,
  569. num_classes: Optional[int] = None,
  570. num_points: Optional[int] = None,
  571. **kwargs: Any,
  572. ) -> LineDetect:
  573. if num_classes is None:
  574. num_classes = 3
  575. if num_points is None:
  576. num_points = 3
  577. backbone = resnet_fpn_backbone(backbone_name='resnet18', weights=None)
  578. model = LineDetect(backbone, num_classes, num_points=num_points, **kwargs)
  579. return model