line_detect.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. import os
  2. from typing import Any, Callable, List, Optional, Tuple, Union
  3. import torch
  4. from torch import nn
  5. from libs.vision_libs import ops
  6. from libs.vision_libs.models import MobileNet_V3_Large_Weights, mobilenet_v3_large, EfficientNet_V2_S_Weights, \
  7. efficientnet_v2_s, detection, EfficientNet_V2_L_Weights, efficientnet_v2_l, EfficientNet_V2_M_Weights, \
  8. efficientnet_v2_m
  9. from libs.vision_libs.models.detection.anchor_utils import AnchorGenerator
  10. from libs.vision_libs.models.detection.rpn import RPNHead, RegionProposalNetwork
  11. from libs.vision_libs.models.detection.ssdlite import _mobilenet_extractor
  12. from libs.vision_libs.models.detection.transform import GeneralizedRCNNTransform
  13. from libs.vision_libs.ops import misc as misc_nn_ops, MultiScaleRoIAlign
  14. from libs.vision_libs.transforms._presets import ObjectDetection
  15. from libs.vision_libs.models._api import register_model, Weights, WeightsEnum
  16. from libs.vision_libs.models._meta import _COCO_PERSON_CATEGORIES, _COCO_PERSON_KEYPOINT_NAMES, _COCO_CATEGORIES
  17. from libs.vision_libs.models._utils import _ovewrite_value_param, handle_legacy_interface
  18. from libs.vision_libs.models.resnet import resnet50, ResNet50_Weights, ResNet18_Weights, resnet18
  19. from libs.vision_libs.models.detection._utils import overwrite_eps
  20. from libs.vision_libs.models.detection.backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers, \
  21. BackboneWithFPN, resnet_fpn_backbone
  22. from libs.vision_libs.models.detection.faster_rcnn import FasterRCNN, TwoMLPHead, FastRCNNPredictor
  23. from .heads.line_heads import LinePredictor
  24. from .loi_heads import RoIHeads
  25. from .trainer import Trainer
  26. from ..base import backbone_factory
  27. from ..base.backbone_factory import get_convnext_fpn, get_anchor_generator, get_maxvit_fpn, MaxVitBackbone, \
  28. get_swin_transformer_fpn
  29. # from ..base.backbone_factory import get_convnext_fpn, get_anchor_generator
  30. from ..base.base_detection_net import BaseDetectionNet
  31. import torch.nn.functional as F
  32. from ..base.high_reso_maxvit import maxvit_with_fpn
  33. from ..base.high_reso_resnet import resnet50fpn, resnet18fpn, resnet101fpn
  34. __all__ = [
  35. "LineDetect",
  36. "linedetect_resnet50_fpn",
  37. ]
  38. from ..line_net.line_detect import LineHeads
  39. def _default_anchorgen():
  40. anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
  41. aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
  42. return AnchorGenerator(anchor_sizes, aspect_ratios)
  43. class LineDetect(BaseDetectionNet):
  44. def __init__(
  45. self,
  46. backbone,
  47. num_classes=3,
  48. # transform parameters
  49. min_size=512,
  50. max_size=512,
  51. image_mean=None,
  52. image_std=None,
  53. # RPN parameters
  54. rpn_anchor_generator=None,
  55. rpn_head=None,
  56. rpn_pre_nms_top_n_train=2000,
  57. rpn_pre_nms_top_n_test=1000,
  58. rpn_post_nms_top_n_train=2000,
  59. rpn_post_nms_top_n_test=1000,
  60. rpn_nms_thresh=0.7,
  61. rpn_fg_iou_thresh=0.7,
  62. rpn_bg_iou_thresh=0.3,
  63. rpn_batch_size_per_image=256,
  64. rpn_positive_fraction=0.5,
  65. rpn_score_thresh=0.0,
  66. # Box parameters
  67. box_roi_pool=None,
  68. box_head=None,
  69. box_predictor=None,
  70. box_score_thresh=0.05,
  71. box_nms_thresh=0.5,
  72. box_detections_per_img=200,
  73. box_fg_iou_thresh=0.5,
  74. box_bg_iou_thresh=0.5,
  75. box_batch_size_per_image=512,
  76. box_positive_fraction=0.25,
  77. bbox_reg_weights=None,
  78. # keypoint parameters
  79. line_roi_pool=None,
  80. line_head=None,
  81. line_predictor=None,
  82. num_points=3,
  83. **kwargs,
  84. ):
  85. out_channels = backbone.out_channels
  86. if rpn_anchor_generator is None:
  87. rpn_anchor_generator = _default_anchorgen()
  88. if rpn_head is None:
  89. rpn_head = RPNHead(out_channels, rpn_anchor_generator.num_anchors_per_location()[0])
  90. rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
  91. rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)
  92. rpn = RegionProposalNetwork(
  93. rpn_anchor_generator,
  94. rpn_head,
  95. rpn_fg_iou_thresh,
  96. rpn_bg_iou_thresh,
  97. rpn_batch_size_per_image,
  98. rpn_positive_fraction,
  99. rpn_pre_nms_top_n,
  100. rpn_post_nms_top_n,
  101. rpn_nms_thresh,
  102. score_thresh=rpn_score_thresh,
  103. )
  104. if box_roi_pool is None:
  105. box_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=7, sampling_ratio=2)
  106. if box_head is None:
  107. resolution = box_roi_pool.output_size[0]
  108. representation_size = 1024
  109. box_head = TwoMLPHead(out_channels * resolution**2, representation_size)
  110. if box_predictor is None:
  111. representation_size = 1024
  112. box_predictor = ObjectionPredictor(representation_size, num_classes)
  113. roi_heads = RoIHeads(
  114. # Box
  115. box_roi_pool,
  116. box_head,
  117. box_predictor,
  118. box_fg_iou_thresh,
  119. box_bg_iou_thresh,
  120. box_batch_size_per_image,
  121. box_positive_fraction,
  122. bbox_reg_weights,
  123. box_score_thresh,
  124. box_nms_thresh,
  125. box_detections_per_img,
  126. )
  127. if image_mean is None:
  128. image_mean = [0.485, 0.456, 0.406]
  129. if image_std is None:
  130. image_std = [0.229, 0.224, 0.225]
  131. transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std, **kwargs)
  132. super().__init__(backbone, rpn, roi_heads, transform)
  133. if line_head is None:
  134. keypoint_layers = tuple(num_points for _ in range(8))
  135. line_head = LineHeads(8, keypoint_layers)
  136. if line_predictor is None:
  137. # keypoint_dim_reduced = 512 # == keypoint_layers[-1]
  138. line_predictor = LinePredictor(in_channels=128)
  139. self.roi_heads.line_roi_pool = line_roi_pool
  140. self.roi_heads.line_head = line_head
  141. self.roi_heads.line_predictor = line_predictor
  142. def start_train(self, cfg):
  143. # cfg = read_yaml(cfg)
  144. self.trainer = Trainer()
  145. self.trainer.train_from_cfg(model=self, cfg=cfg)
  146. def load_weights(self, save_path, device='cuda'):
  147. if os.path.exists(save_path):
  148. checkpoint = torch.load(save_path, map_location=device)
  149. self.load_state_dict(checkpoint['model_state_dict'])
  150. # if optimizer is not None:
  151. # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
  152. # epoch = checkpoint['epoch']
  153. # loss = checkpoint['loss']
  154. # print(f"Loaded best model from {save_path} at epoch {epoch} with loss {loss:.4f}")
  155. print(f"Loaded model from {save_path}")
  156. else:
  157. print(f"No saved model found at {save_path}")
  158. return self
  159. class TwoMLPHead(nn.Module):
  160. """
  161. Standard heads for FPN-based models
  162. Args:
  163. in_channels (int): number of input channels
  164. representation_size (int): size of the intermediate representation
  165. """
  166. def __init__(self, in_channels, representation_size):
  167. super().__init__()
  168. self.fc6 = nn.Linear(in_channels, representation_size)
  169. self.fc7 = nn.Linear(representation_size, representation_size)
  170. def forward(self, x):
  171. x = x.flatten(start_dim=1)
  172. x = F.relu(self.fc6(x))
  173. x = F.relu(self.fc7(x))
  174. return x
  175. class ObjectionConvFCHead(nn.Sequential):
  176. def __init__(
  177. self,
  178. input_size: Tuple[int, int, int],
  179. conv_layers: List[int],
  180. fc_layers: List[int],
  181. norm_layer: Optional[Callable[..., nn.Module]] = None,
  182. ):
  183. """
  184. Args:
  185. input_size (Tuple[int, int, int]): the input size in CHW format.
  186. conv_layers (list): feature dimensions of each Convolution layer
  187. fc_layers (list): feature dimensions of each FCN layer
  188. norm_layer (callable, optional): Module specifying the normalization layer to use. Default: None
  189. """
  190. in_channels, in_height, in_width = input_size
  191. blocks = []
  192. previous_channels = in_channels
  193. for current_channels in conv_layers:
  194. blocks.append(misc_nn_ops.Conv2dNormActivation(previous_channels, current_channels, norm_layer=norm_layer))
  195. previous_channels = current_channels
  196. blocks.append(nn.Flatten())
  197. previous_channels = previous_channels * in_height * in_width
  198. for current_channels in fc_layers:
  199. blocks.append(nn.Linear(previous_channels, current_channels))
  200. blocks.append(nn.ReLU(inplace=True))
  201. previous_channels = current_channels
  202. super().__init__(*blocks)
  203. for layer in self.modules():
  204. if isinstance(layer, nn.Conv2d):
  205. nn.init.kaiming_normal_(layer.weight, mode="fan_out", nonlinearity="relu")
  206. if layer.bias is not None:
  207. nn.init.zeros_(layer.bias)
  208. class ObjectionPredictor(nn.Module):
  209. """
  210. Standard classification + bounding box regression layers
  211. for Fast R-CNN.
  212. Args:
  213. in_channels (int): number of input channels
  214. num_classes (int): number of output classes (including background)
  215. """
  216. def __init__(self, in_channels, num_classes):
  217. super().__init__()
  218. self.cls_score = nn.Linear(in_channels, num_classes)
  219. self.bbox_pred = nn.Linear(in_channels, num_classes * 4)
  220. def forward(self, x):
  221. if x.dim() == 4:
  222. torch._assert(
  223. list(x.shape[2:]) == [1, 1],
  224. f"x has the wrong shape, expecting the last two dimensions to be [1,1] instead of {list(x.shape[2:])}",
  225. )
  226. x = x.flatten(start_dim=1)
  227. scores = self.cls_score(x)
  228. bbox_deltas = self.bbox_pred(x)
  229. return scores, bbox_deltas
  230. def linedetect_newresnet18fpn(
  231. *,
  232. num_classes: Optional[int] = None,
  233. num_points:Optional[int] = None,
  234. **kwargs: Any,
  235. ) -> LineDetect:
  236. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  237. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  238. if num_classes is None:
  239. num_classes = 3
  240. if num_points is None:
  241. num_points = 3
  242. backbone =resnet18fpn()
  243. featmap_names=['0', '1', '2', '3','4','pool']
  244. # print(f'featmap_names:{featmap_names}')
  245. roi_pooler = MultiScaleRoIAlign(
  246. featmap_names=featmap_names,
  247. output_size=7,
  248. sampling_ratio=2
  249. )
  250. num_features=len(featmap_names)
  251. anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
  252. # print(f'anchor_sizes:{anchor_sizes}')
  253. aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
  254. # print(f'aspect_ratios:{aspect_ratios}')
  255. anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
  256. model = LineDetect(backbone, num_classes, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, **kwargs)
  257. return model
  258. def linedetect_newresnet50fpn(
  259. *,
  260. num_classes: Optional[int] = None,
  261. num_points:Optional[int] = None,
  262. **kwargs: Any,
  263. ) -> LineDetect:
  264. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  265. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  266. if num_classes is None:
  267. num_classes = 3
  268. if num_points is None:
  269. num_points = 3
  270. backbone =resnet50fpn()
  271. featmap_names=['0', '1', '2', '3','4','pool']
  272. # print(f'featmap_names:{featmap_names}')
  273. roi_pooler = MultiScaleRoIAlign(
  274. featmap_names=featmap_names,
  275. output_size=7,
  276. sampling_ratio=2
  277. )
  278. num_features=len(featmap_names)
  279. anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
  280. # print(f'anchor_sizes:{anchor_sizes}')
  281. aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
  282. # print(f'aspect_ratios:{aspect_ratios}')
  283. anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
  284. model = LineDetect(backbone, num_classes, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, **kwargs)
  285. return model
  286. def linedetect_newresnet101fpn(
  287. *,
  288. num_classes: Optional[int] = None,
  289. num_points:Optional[int] = None,
  290. **kwargs: Any,
  291. ) -> LineDetect:
  292. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  293. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  294. if num_classes is None:
  295. num_classes = 3
  296. if num_points is None:
  297. num_points = 3
  298. backbone =resnet101fpn()
  299. featmap_names=['0', '1', '2', '3','4','pool']
  300. # print(f'featmap_names:{featmap_names}')
  301. roi_pooler = MultiScaleRoIAlign(
  302. featmap_names=featmap_names,
  303. output_size=7,
  304. sampling_ratio=2
  305. )
  306. num_features=len(featmap_names)
  307. anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
  308. # print(f'anchor_sizes:{anchor_sizes}')
  309. aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
  310. # print(f'aspect_ratios:{aspect_ratios}')
  311. anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
  312. model = LineDetect(backbone, num_classes, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, **kwargs)
  313. return model
  314. def linedetect_maxvitfpn(
  315. *,
  316. num_classes: Optional[int] = None,
  317. num_points:Optional[int] = None,
  318. **kwargs: Any,
  319. ) -> LineDetect:
  320. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  321. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  322. if num_classes is None:
  323. num_classes = 3
  324. if num_points is None:
  325. num_points = 3
  326. size=224*2
  327. maxvit = MaxVitBackbone(input_size=(size,size))
  328. # print(maxvit.named_children())
  329. # for i,layer in enumerate(maxvit.named_children()):
  330. # print(f'layer:{i}:{layer}')
  331. in_channels_list = [64, 64, 128, 256, 512]
  332. featmap_names = ['0', '1', '2', '3', '4', 'pool']
  333. roi_pooler = MultiScaleRoIAlign(
  334. featmap_names=featmap_names,
  335. output_size=7,
  336. sampling_ratio=2
  337. )
  338. backbone_with_fpn = BackboneWithFPN(
  339. maxvit,
  340. return_layers={'stem': '0', 'block0': '1', 'block1': '2', 'block2': '3', 'block3': '4'},
  341. # 确保这些键对应到实际的层
  342. in_channels_list=in_channels_list,
  343. out_channels=128
  344. )
  345. test_input = torch.randn(1, 3,size,size)
  346. model = LineDetect(
  347. backbone=backbone_with_fpn,
  348. min_size=size,
  349. max_size=size,
  350. num_classes=3, # COCO 数据集有 91 类
  351. rpn_anchor_generator=get_anchor_generator(backbone_with_fpn, test_input=test_input),
  352. box_roi_pool=roi_pooler
  353. )
  354. return model
  355. def linedetect_high_maxvitfpn(
  356. *,
  357. num_classes: Optional[int] = None,
  358. num_points:Optional[int] = None,
  359. **kwargs: Any,
  360. ) -> LineDetect:
  361. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  362. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  363. if num_classes is None:
  364. num_classes = 3
  365. if num_points is None:
  366. num_points = 3
  367. size=224*2
  368. maxvitfpn =maxvit_with_fpn(size=size)
  369. # print(maxvit.named_children())
  370. # for i,layer in enumerate(maxvit.named_children()):
  371. # print(f'layer:{i}:{layer}')
  372. in_channels_list = [64,64, 64, 128, 256, 512]
  373. featmap_names = ['0', '1', '2', '3', '4', '5','pool']
  374. roi_pooler = MultiScaleRoIAlign(
  375. featmap_names=featmap_names,
  376. output_size=7,
  377. sampling_ratio=2
  378. )
  379. test_input = torch.randn(1, 3,size,size)
  380. model = LineDetect(
  381. backbone=maxvitfpn,
  382. min_size=size,
  383. max_size=size,
  384. num_classes=3, # COCO 数据集有 91 类
  385. rpn_anchor_generator=get_anchor_generator(maxvitfpn, test_input=test_input),
  386. box_roi_pool=roi_pooler
  387. )
  388. return model
  389. def linedetect_swin_transformer_fpn(
  390. *,
  391. num_classes: Optional[int] = None,
  392. num_points:Optional[int] = None,
  393. type='t',
  394. **kwargs: Any,
  395. ) -> LineDetect:
  396. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  397. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  398. if num_classes is None:
  399. num_classes = 3
  400. if num_points is None:
  401. num_points = 3
  402. size=512
  403. backbone_with_fpn, roi_pooler, anchor_generator=get_swin_transformer_fpn(type=type)
  404. # test_input = torch.randn(1, 3,size,size)
  405. model = LineDetect(
  406. backbone=backbone_with_fpn,
  407. min_size=size,
  408. max_size=size,
  409. num_classes=3, # COCO 数据集有 91 类
  410. rpn_anchor_generator=anchor_generator,
  411. box_roi_pool=roi_pooler
  412. )
  413. return model
  414. def linedetect_resnet18_fpn(
  415. *,
  416. num_classes: Optional[int] = None,
  417. num_points: Optional[int] = None,
  418. **kwargs: Any,
  419. ) -> LineDetect:
  420. if num_classes is None:
  421. num_classes = 3
  422. if num_points is None:
  423. num_points = 3
  424. backbone = resnet_fpn_backbone(backbone_name='resnet18',weights=None)
  425. model = LineDetect(backbone, num_classes, num_points=num_points, **kwargs)
  426. return model
  427. def linedetect_resnet50_fpn(
  428. *,
  429. num_classes: Optional[int] = None,
  430. num_points: Optional[int] = None,
  431. **kwargs: Any,
  432. ) -> LineDetect:
  433. if num_classes is None:
  434. num_classes = 3
  435. if num_points is None:
  436. num_points = 3
  437. backbone = resnet_fpn_backbone(backbone_name='resnet18', weights=None)
  438. model = LineDetect(backbone, num_classes, num_points=num_points, **kwargs)
  439. return model