line_detect.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. import os
  2. from typing import Any, Callable, List, Optional, Tuple, Union
  3. import torch
  4. from torch import nn
  5. from libs.vision_libs import ops
  6. from libs.vision_libs.models import MobileNet_V3_Large_Weights, mobilenet_v3_large, EfficientNet_V2_S_Weights, \
  7. efficientnet_v2_s, detection, EfficientNet_V2_L_Weights, efficientnet_v2_l, EfficientNet_V2_M_Weights, \
  8. efficientnet_v2_m
  9. from libs.vision_libs.models.detection.anchor_utils import AnchorGenerator
  10. from libs.vision_libs.models.detection.rpn import RPNHead, RegionProposalNetwork
  11. from libs.vision_libs.models.detection.ssdlite import _mobilenet_extractor
  12. from libs.vision_libs.models.detection.transform import GeneralizedRCNNTransform
  13. from libs.vision_libs.ops import misc as misc_nn_ops, MultiScaleRoIAlign
  14. from libs.vision_libs.transforms._presets import ObjectDetection
  15. from libs.vision_libs.models._api import register_model, Weights, WeightsEnum
  16. from libs.vision_libs.models._meta import _COCO_PERSON_CATEGORIES, _COCO_PERSON_KEYPOINT_NAMES, _COCO_CATEGORIES
  17. from libs.vision_libs.models._utils import _ovewrite_value_param, handle_legacy_interface
  18. from libs.vision_libs.models.resnet import resnet50, ResNet50_Weights, ResNet18_Weights, resnet18
  19. from libs.vision_libs.models.detection._utils import overwrite_eps
  20. from libs.vision_libs.models.detection.backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers, \
  21. BackboneWithFPN, resnet_fpn_backbone
  22. from libs.vision_libs.models.detection.faster_rcnn import FasterRCNN, TwoMLPHead, FastRCNNPredictor
  23. from .roi_heads import RoIHeads
  24. from .trainer import Trainer
  25. from ..base import backbone_factory
  26. from ..base.backbone_factory import get_convnext_fpn, get_anchor_generator
  27. # from ..base.backbone_factory import get_convnext_fpn, get_anchor_generator
  28. from ..base.base_detection_net import BaseDetectionNet
  29. import torch.nn.functional as F
  30. from ..base.high_reso_resnet import resnet50fpn, resnet18fpn
  31. __all__ = [
  32. "LineDetect",
  33. "LineDetect_ResNet50_FPN_Weights",
  34. "linedetect_resnet50_fpn",
  35. ]
  36. def _default_anchorgen():
  37. anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
  38. aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
  39. return AnchorGenerator(anchor_sizes, aspect_ratios)
  40. class LineDetect(BaseDetectionNet):
  41. def __init__(
  42. self,
  43. backbone,
  44. num_classes=None,
  45. # transform parameters
  46. min_size=512,
  47. max_size=1333,
  48. image_mean=None,
  49. image_std=None,
  50. # RPN parameters
  51. rpn_anchor_generator=None,
  52. rpn_head=None,
  53. rpn_pre_nms_top_n_train=2000,
  54. rpn_pre_nms_top_n_test=1000,
  55. rpn_post_nms_top_n_train=2000,
  56. rpn_post_nms_top_n_test=1000,
  57. rpn_nms_thresh=0.7,
  58. rpn_fg_iou_thresh=0.7,
  59. rpn_bg_iou_thresh=0.3,
  60. rpn_batch_size_per_image=256,
  61. rpn_positive_fraction=0.5,
  62. rpn_score_thresh=0.0,
  63. # Box parameters
  64. box_roi_pool=None,
  65. box_head=None,
  66. box_predictor=None,
  67. box_score_thresh=0.05,
  68. box_nms_thresh=0.5,
  69. box_detections_per_img=100,
  70. box_fg_iou_thresh=0.5,
  71. box_bg_iou_thresh=0.5,
  72. box_batch_size_per_image=512,
  73. box_positive_fraction=0.25,
  74. bbox_reg_weights=None,
  75. # keypoint parameters
  76. line_roi_pool=None,
  77. line_head=None,
  78. line_predictor=None,
  79. num_keypoints=None,
  80. **kwargs,
  81. ):
  82. out_channels = backbone.out_channels
  83. if rpn_anchor_generator is None:
  84. rpn_anchor_generator = _default_anchorgen()
  85. if rpn_head is None:
  86. rpn_head = RPNHead(out_channels, rpn_anchor_generator.num_anchors_per_location()[0])
  87. rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
  88. rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)
  89. rpn = RegionProposalNetwork(
  90. rpn_anchor_generator,
  91. rpn_head,
  92. rpn_fg_iou_thresh,
  93. rpn_bg_iou_thresh,
  94. rpn_batch_size_per_image,
  95. rpn_positive_fraction,
  96. rpn_pre_nms_top_n,
  97. rpn_post_nms_top_n,
  98. rpn_nms_thresh,
  99. score_thresh=rpn_score_thresh,
  100. )
  101. if box_roi_pool is None:
  102. box_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=7, sampling_ratio=2)
  103. if box_head is None:
  104. resolution = box_roi_pool.output_size[0]
  105. representation_size = 1024
  106. box_head = TwoMLPHead(out_channels * resolution**2, representation_size)
  107. if box_predictor is None:
  108. representation_size = 1024
  109. box_predictor = ObjectionPredictor(representation_size, num_classes)
  110. roi_heads = RoIHeads(
  111. # Box
  112. box_roi_pool,
  113. box_head,
  114. box_predictor,
  115. box_fg_iou_thresh,
  116. box_bg_iou_thresh,
  117. box_batch_size_per_image,
  118. box_positive_fraction,
  119. bbox_reg_weights,
  120. box_score_thresh,
  121. box_nms_thresh,
  122. box_detections_per_img,
  123. )
  124. if image_mean is None:
  125. image_mean = [0.485, 0.456, 0.406]
  126. if image_std is None:
  127. image_std = [0.229, 0.224, 0.225]
  128. transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std, **kwargs)
  129. super().__init__(backbone, rpn, roi_heads, transform)
  130. if not isinstance(line_roi_pool, (MultiScaleRoIAlign, type(None))):
  131. raise TypeError(
  132. "keypoint_roi_pool should be of type MultiScaleRoIAlign or None instead of {type(keypoint_roi_pool)}"
  133. )
  134. if min_size is None:
  135. min_size = (640, 672, 704, 736, 768, 800)
  136. if num_keypoints is not None:
  137. if line_predictor is not None:
  138. raise ValueError("num_keypoints should be None when keypoint_predictor is specified")
  139. else:
  140. num_keypoints = 2
  141. if line_roi_pool is None:
  142. line_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=14, sampling_ratio=2)
  143. if line_head is None:
  144. keypoint_layers = tuple(512 for _ in range(8))
  145. line_head = LineHeads(out_channels, keypoint_layers)
  146. if line_predictor is None:
  147. keypoint_dim_reduced = 512 # == keypoint_layers[-1]
  148. line_predictor = LinePredictor(keypoint_dim_reduced)
  149. self.roi_heads.line_roi_pool = line_roi_pool
  150. self.roi_heads.line_head = line_head
  151. self.roi_heads.line_predictor = line_predictor
  152. def start_train(self, cfg):
  153. # cfg = read_yaml(cfg)
  154. self.trainer = Trainer()
  155. self.trainer.train_from_cfg(model=self, cfg=cfg)
  156. def load_weights(self, save_path, device='cuda'):
  157. if os.path.exists(save_path):
  158. checkpoint = torch.load(save_path, map_location=device)
  159. self.load_state_dict(checkpoint['model_state_dict'])
  160. # if optimizer is not None:
  161. # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
  162. # epoch = checkpoint['epoch']
  163. # loss = checkpoint['loss']
  164. # print(f"Loaded best model from {save_path} at epoch {epoch} with loss {loss:.4f}")
  165. print(f"Loaded model from {save_path}")
  166. else:
  167. print(f"No saved model found at {save_path}")
  168. return self
  169. class TwoMLPHead(nn.Module):
  170. """
  171. Standard heads for FPN-based models
  172. Args:
  173. in_channels (int): number of input channels
  174. representation_size (int): size of the intermediate representation
  175. """
  176. def __init__(self, in_channels, representation_size):
  177. super().__init__()
  178. self.fc6 = nn.Linear(in_channels, representation_size)
  179. self.fc7 = nn.Linear(representation_size, representation_size)
  180. def forward(self, x):
  181. x = x.flatten(start_dim=1)
  182. x = F.relu(self.fc6(x))
  183. x = F.relu(self.fc7(x))
  184. return x
  185. class ObjectionConvFCHead(nn.Sequential):
  186. def __init__(
  187. self,
  188. input_size: Tuple[int, int, int],
  189. conv_layers: List[int],
  190. fc_layers: List[int],
  191. norm_layer: Optional[Callable[..., nn.Module]] = None,
  192. ):
  193. """
  194. Args:
  195. input_size (Tuple[int, int, int]): the input size in CHW format.
  196. conv_layers (list): feature dimensions of each Convolution layer
  197. fc_layers (list): feature dimensions of each FCN layer
  198. norm_layer (callable, optional): Module specifying the normalization layer to use. Default: None
  199. """
  200. in_channels, in_height, in_width = input_size
  201. blocks = []
  202. previous_channels = in_channels
  203. for current_channels in conv_layers:
  204. blocks.append(misc_nn_ops.Conv2dNormActivation(previous_channels, current_channels, norm_layer=norm_layer))
  205. previous_channels = current_channels
  206. blocks.append(nn.Flatten())
  207. previous_channels = previous_channels * in_height * in_width
  208. for current_channels in fc_layers:
  209. blocks.append(nn.Linear(previous_channels, current_channels))
  210. blocks.append(nn.ReLU(inplace=True))
  211. previous_channels = current_channels
  212. super().__init__(*blocks)
  213. for layer in self.modules():
  214. if isinstance(layer, nn.Conv2d):
  215. nn.init.kaiming_normal_(layer.weight, mode="fan_out", nonlinearity="relu")
  216. if layer.bias is not None:
  217. nn.init.zeros_(layer.bias)
  218. class ObjectionPredictor(nn.Module):
  219. """
  220. Standard classification + bounding box regression layers
  221. for Fast R-CNN.
  222. Args:
  223. in_channels (int): number of input channels
  224. num_classes (int): number of output classes (including background)
  225. """
  226. def __init__(self, in_channels, num_classes):
  227. super().__init__()
  228. self.cls_score = nn.Linear(in_channels, num_classes)
  229. self.bbox_pred = nn.Linear(in_channels, num_classes * 4)
  230. def forward(self, x):
  231. if x.dim() == 4:
  232. torch._assert(
  233. list(x.shape[2:]) == [1, 1],
  234. f"x has the wrong shape, expecting the last two dimensions to be [1,1] instead of {list(x.shape[2:])}",
  235. )
  236. x = x.flatten(start_dim=1)
  237. scores = self.cls_score(x)
  238. bbox_deltas = self.bbox_pred(x)
  239. return scores, bbox_deltas
  240. class LineHeads(nn.Sequential):
  241. def __init__(self, in_channels, layers):
  242. d = []
  243. next_feature = in_channels
  244. for out_channels in layers:
  245. d.append(nn.Conv2d(next_feature, out_channels, 3, stride=1, padding=1))
  246. d.append(nn.ReLU(inplace=True))
  247. next_feature = out_channels
  248. super().__init__(*d)
  249. for m in self.children():
  250. if isinstance(m, nn.Conv2d):
  251. nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
  252. nn.init.constant_(m.bias, 0)
  253. class LinePredictor(nn.Module):
  254. def __init__(self, in_channels, out_channels=1 ):
  255. super().__init__()
  256. input_features = in_channels
  257. deconv_kernel = 4
  258. self.kps_score_lowres = nn.ConvTranspose2d(
  259. input_features,
  260. out_channels,
  261. deconv_kernel,
  262. stride=2,
  263. padding=deconv_kernel // 2 - 1,
  264. )
  265. nn.init.kaiming_normal_(self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu")
  266. nn.init.constant_(self.kps_score_lowres.bias, 0)
  267. self.up_scale = 2
  268. self.out_channels = out_channels
  269. def forward(self, x):
  270. print(f'before kps_score_lowres x:{x.shape}')
  271. x = self.kps_score_lowres(x)
  272. print(f'kps_score_lowres x:{x.shape}')
  273. return torch.nn.functional.interpolate(
  274. x, scale_factor=float(self.up_scale), mode="bilinear", align_corners=False, recompute_scale_factor=False
  275. )
  276. def linedetect_newresnet18fpn(
  277. *,
  278. num_classes: Optional[int] = None,
  279. num_points:Optional[int] = None,
  280. **kwargs: Any,
  281. ) -> LineDetect:
  282. # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
  283. # weights_backbone = ResNet50_Weights.verify(weights_backbone)
  284. if num_classes is None:
  285. num_classes = 2
  286. if num_points is None:
  287. num_points = 2
  288. backbone =resnet18fpn()
  289. featmap_names=['0', '1', '2', '3','pool']
  290. # print(f'featmap_names:{featmap_names}')
  291. roi_pooler = MultiScaleRoIAlign(
  292. featmap_names=featmap_names,
  293. output_size=7,
  294. sampling_ratio=2
  295. )
  296. num_features=len(featmap_names)
  297. anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
  298. # print(f'anchor_sizes:{anchor_sizes}')
  299. aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
  300. # print(f'aspect_ratios:{aspect_ratios}')
  301. anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
  302. model = LineDetect(backbone, num_classes, num_keypoints=num_points,rpn_anchor_generator=anchor_generator,box_roi_pool=roi_pooler, **kwargs)
  303. return model
  304. def linedetect_resnet18_fpn(
  305. *,
  306. num_classes: Optional[int] = None,
  307. num_points: Optional[int] = None,
  308. **kwargs: Any,
  309. ) -> LineDetect:
  310. if num_classes is None:
  311. num_classes = 2
  312. if num_points is None:
  313. num_points = 2
  314. backbone = resnet_fpn_backbone(backbone_name='resnet18',weights=None)
  315. model = LineDetect(backbone, num_classes, num_keypoints=num_points, **kwargs)
  316. return model
  317. def linedetect_resnet50_fpn(
  318. *,
  319. num_classes: Optional[int] = None,
  320. num_points: Optional[int] = None,
  321. **kwargs: Any,
  322. ) -> LineDetect:
  323. if num_classes is None:
  324. num_classes = 2
  325. if num_points is None:
  326. num_points = 2
  327. backbone = resnet_fpn_backbone(backbone_name='resnet18', weights=None)
  328. model = LineDetect(backbone, num_classes, num_keypoints=num_points, **kwargs)
  329. return model