| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058 |
- import os
- from typing import Any, Callable, List, Optional, Tuple, Union
- import torch
- from torch import nn
- from torchvision.ops import MultiScaleRoIAlign
- from libs.vision_libs import ops
- from libs.vision_libs.models import MobileNet_V3_Large_Weights, mobilenet_v3_large, EfficientNet_V2_S_Weights, \
- efficientnet_v2_s, detection, EfficientNet_V2_L_Weights, efficientnet_v2_l, EfficientNet_V2_M_Weights, \
- efficientnet_v2_m
- from libs.vision_libs.models.detection.anchor_utils import AnchorGenerator
- from libs.vision_libs.models.detection.rpn import RPNHead, RegionProposalNetwork
- from libs.vision_libs.models.detection.ssdlite import _mobilenet_extractor
- from libs.vision_libs.models.detection.transform import GeneralizedRCNNTransform
- from libs.vision_libs.ops import misc as misc_nn_ops
- from libs.vision_libs.transforms._presets import ObjectDetection
- from .line_head import LineRCNNHeads
- from .line_predictor import LineRCNNPredictor
- from libs.vision_libs.models._api import register_model, Weights, WeightsEnum
- from libs.vision_libs.models._meta import _COCO_PERSON_CATEGORIES, _COCO_PERSON_KEYPOINT_NAMES, _COCO_CATEGORIES
- from libs.vision_libs.models._utils import _ovewrite_value_param, handle_legacy_interface
- from libs.vision_libs.models.resnet import resnet50, ResNet50_Weights, ResNet18_Weights, resnet18
- from libs.vision_libs.models.detection._utils import overwrite_eps
- from libs.vision_libs.models.detection.backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers, \
- BackboneWithFPN
- from libs.vision_libs.models.detection.faster_rcnn import FasterRCNN, TwoMLPHead, FastRCNNPredictor
- from .roi_heads import RoIHeads
- from .trainer import Trainer
- from ..base import backbone_factory
- from ..base.backbone_factory import get_convnext_fpn, get_anchor_generator
- # from ..base.backbone_factory import get_convnext_fpn, get_anchor_generator
- from ..base.base_detection_net import BaseDetectionNet
- import torch.nn.functional as F
- from .predict import Predict1, Predict
- from ..base.high_reso_resnet import resnet50fpn, resnet18fpn
- from ..config.config_tool import read_yaml
- FEATURE_DIM = 8
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
- __all__ = [
- "LineNet",
- "LineNet_ResNet50_FPN_Weights",
- "LineNet_ResNet50_FPN_V2_Weights",
- "LineNet_MobileNet_V3_Large_FPN_Weights",
- "LineNet_MobileNet_V3_Large_320_FPN_Weights",
- "linenet_resnet50_fpn",
- "linenet_resnet50_fpn_v2",
- "linenet_mobilenet_v3_large_fpn",
- "linenet_mobilenet_v3_large_320_fpn",
- ]
- def _default_anchorgen():
- anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
- aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
- return AnchorGenerator(anchor_sizes, aspect_ratios)
- class LineNet(BaseDetectionNet):
- # def __init__(self, cfg, **kwargs):
- # cfg = read_yaml(cfg)
- # self.cfg=cfg
- # backbone = cfg['backbone']
- # print(f'LineNet Backbone:{backbone}')
- # num_classes = cfg['num_classes']
- #
- # if backbone == 'resnet50_fpn':
- # backbone=backbone_factory.get_resnet50_fpn()
- # print(f'out_chanenels:{backbone.out_channels}')
- # elif backbone== 'mobilenet_v3_large_fpn':
- # backbone=backbone_factory.get_mobilenet_v3_large_fpn()
- # elif backbone=='resnet18_fpn':
- # backbone=backbone_factory.get_resnet18_fpn()
- #
- # self.__construct__(backbone=backbone, num_classes=num_classes, **kwargs)
- def __init__(
- self,
- backbone,
- num_classes=None,
- # transform parameters
- min_size=512,
- max_size=1333,
- image_mean=None,
- image_std=None,
- # RPN parameters
- rpn_anchor_generator=None,
- rpn_head=None,
- rpn_pre_nms_top_n_train=2000,
- rpn_pre_nms_top_n_test=1000,
- rpn_post_nms_top_n_train=2000,
- rpn_post_nms_top_n_test=1000,
- rpn_nms_thresh=0.7,
- rpn_fg_iou_thresh=0.7,
- rpn_bg_iou_thresh=0.3,
- rpn_batch_size_per_image=256,
- rpn_positive_fraction=0.5,
- rpn_score_thresh=0.0,
- # Box parameters
- box_roi_pool=None,
- box_head=None,
- box_predictor=None,
- box_score_thresh=0.05,
- box_nms_thresh=0.5,
- box_detections_per_img=100,
- box_fg_iou_thresh=0.5,
- box_bg_iou_thresh=0.5,
- box_batch_size_per_image=512,
- box_positive_fraction=0.25,
- bbox_reg_weights=None,
- # line parameters
- line_head=None,
- line_predictor=None,
- **kwargs,
- ):
- if not hasattr(backbone, "out_channels"):
- raise ValueError(
- "backbone should contain an attribute out_channels "
- "specifying the number of output channels (assumed to be the "
- "same for all the levels)"
- )
- if not isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))):
- raise TypeError(
- f"rpn_anchor_generator should be of type AnchorGenerator or None instead of {type(rpn_anchor_generator)}"
- )
- if not isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))):
- raise TypeError(
- f"box_roi_pool should be of type MultiScaleRoIAlign or None instead of {type(box_roi_pool)}"
- )
- if num_classes is not None:
- if box_predictor is not None:
- raise ValueError("num_classes should be None when box_predictor is specified")
- else:
- if box_predictor is None:
- raise ValueError("num_classes should not be None when box_predictor is not specified")
- out_channels = backbone.out_channels
- # cfg = read_yaml(cfg)
- # self.cfg=cfg
- if line_head is None:
- num_class = 5
- line_head = LineRCNNHeads(out_channels, num_class)
- if line_predictor is None:
- line_predictor = LineRCNNPredictor()
- if rpn_anchor_generator is None:
- rpn_anchor_generator = _default_anchorgen()
- if rpn_head is None:
- rpn_head = RPNHead(out_channels, rpn_anchor_generator.num_anchors_per_location()[0])
- rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
- rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)
- rpn = RegionProposalNetwork(
- rpn_anchor_generator,
- rpn_head,
- rpn_fg_iou_thresh,
- rpn_bg_iou_thresh,
- rpn_batch_size_per_image,
- rpn_positive_fraction,
- rpn_pre_nms_top_n,
- rpn_post_nms_top_n,
- rpn_nms_thresh,
- score_thresh=rpn_score_thresh,
- )
- if box_roi_pool is None:
- box_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3","4"], output_size=7, sampling_ratio=2)
- if box_head is None:
- resolution = box_roi_pool.output_size[0]
- representation_size = 1024
- box_head = TwoMLPHead(out_channels * resolution ** 2, representation_size)
- if box_predictor is None:
- representation_size = 1024
- box_predictor = BoxPredictor(representation_size, num_classes)
- roi_heads = RoIHeads(
- # Box
- box_roi_pool,
- box_head,
- box_predictor,
- line_head,
- line_predictor,
- box_fg_iou_thresh,
- box_bg_iou_thresh,
- box_batch_size_per_image,
- box_positive_fraction,
- bbox_reg_weights,
- box_score_thresh,
- box_nms_thresh,
- box_detections_per_img,
- )
- if image_mean is None:
- image_mean = [0.485, 0.456, 0.406]
- if image_std is None:
- image_std = [0.229, 0.224, 0.225]
- transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std, **kwargs)
- super().__init__(backbone, rpn, roi_heads, transform)
- self.roi_heads = roi_heads
- # self.roi_heads.line_head = line_head
- # self.roi_heads.line_predictor = line_predictor
- def start_train(self, cfg):
- # cfg = read_yaml(cfg)
- self.trainer = Trainer()
- self.trainer.train_from_cfg(model=self, cfg=cfg)
- def load_best_model(self,save_path, device='cuda'):
- if os.path.exists(save_path):
- checkpoint = torch.load(save_path, map_location=device)
- self.load_state_dict(checkpoint['model_state_dict'])
- # if optimizer is not None:
- # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
- # epoch = checkpoint['epoch']
- # loss = checkpoint['loss']
- # print(f"Loaded best model from {save_path} at epoch {epoch} with loss {loss:.4f}")
- print(f"Loaded model from {save_path}")
- else:
- print(f"No saved model found at {save_path}")
- return self
- # 加载权重和推理一起
- def predict(self,img_path, type=0, threshold=0.5, save_path=None, show=False):
- # self.predict = Predict(pt_path, model, img_path, type, threshold, save_path, show)
- self.eval()
- self.to(device)
- self.predict = Predict(self, img_path, type, threshold, save_path, show)
- self.predict.run()
- # 不加载权重
- def predict1(self, model, img_path, type=0, threshold=0.5, save_path=None, show=False):
- self.predict = Predict1(model, img_path, type, threshold, save_path, show)
- self.predict.run()
- class TwoMLPHead(nn.Module):
- """
- Standard heads for FPN-based models
- Args:
- in_channels (int): number of input channels
- representation_size (int): size of the intermediate representation
- """
- def __init__(self, in_channels, representation_size):
- super().__init__()
- self.fc6 = nn.Linear(in_channels, representation_size)
- self.fc7 = nn.Linear(representation_size, representation_size)
- def forward(self, x):
- x = x.flatten(start_dim=1)
- x = F.relu(self.fc6(x))
- x = F.relu(self.fc7(x))
- return x
- class LineNetConvFCHead(nn.Sequential):
- def __init__(
- self,
- input_size: Tuple[int, int, int],
- conv_layers: List[int],
- fc_layers: List[int],
- norm_layer: Optional[Callable[..., nn.Module]] = None,
- ):
- """
- Args:
- input_size (Tuple[int, int, int]): the input size in CHW format.
- conv_layers (list): feature dimensions of each Convolution layer
- fc_layers (list): feature dimensions of each FCN layer
- norm_layer (callable, optional): Module specifying the normalization layer to use. Default: None
- """
- in_channels, in_height, in_width = input_size
- blocks = []
- previous_channels = in_channels
- for current_channels in conv_layers:
- blocks.append(misc_nn_ops.Conv2dNormActivation(previous_channels, current_channels, norm_layer=norm_layer))
- previous_channels = current_channels
- blocks.append(nn.Flatten())
- previous_channels = previous_channels * in_height * in_width
- for current_channels in fc_layers:
- blocks.append(nn.Linear(previous_channels, current_channels))
- blocks.append(nn.ReLU(inplace=True))
- previous_channels = current_channels
- super().__init__(*blocks)
- for layer in self.modules():
- if isinstance(layer, nn.Conv2d):
- nn.init.kaiming_normal_(layer.weight, mode="fan_out", nonlinearity="relu")
- if layer.bias is not None:
- nn.init.zeros_(layer.bias)
- class BoxPredictor(nn.Module):
- """
- Standard classification + bounding box regression layers
- for Fast R-CNN.
- Args:
- in_channels (int): number of input channels
- num_classes (int): number of output classes (including background)
- """
- def __init__(self, in_channels, num_classes):
- super().__init__()
- self.cls_score = nn.Linear(in_channels, num_classes)
- self.bbox_pred = nn.Linear(in_channels, num_classes * 4)
- def forward(self, x):
- if x.dim() == 4:
- torch._assert(
- list(x.shape[2:]) == [1, 1],
- f"x has the wrong shape, expecting the last two dimensions to be [1,1] instead of {list(x.shape[2:])}",
- )
- x = x.flatten(start_dim=1)
- scores = self.cls_score(x)
- bbox_deltas = self.bbox_pred(x)
- return scores, bbox_deltas
- _COMMON_META = {
- "categories": _COCO_CATEGORIES,
- "min_size": (1, 1),
- }
- def create_efficientnetv2_backbone(name='efficientnet_v2_m', pretrained=True):
- # 加载EfficientNetV2模型
- if name == 'efficientnet_v2_s':
- weights = EfficientNet_V2_S_Weights.IMAGENET1K_V1 if pretrained else None
- backbone = efficientnet_v2_s(weights=weights).features
- if name == 'efficientnet_v2_m':
- weights = EfficientNet_V2_M_Weights.IMAGENET1K_V1 if pretrained else None
- backbone = efficientnet_v2_m(weights=weights).features
- if name == 'efficientnet_v2_l':
- weights = EfficientNet_V2_L_Weights.IMAGENET1K_V1 if pretrained else None
- backbone = efficientnet_v2_l(weights=weights).features
- # 定义返回的层索引和名称
- return_layers = {"2": "0", "3": "1", "4": "2", "5": "3"}
- # 获取每个层输出通道数
- in_channels_list = []
- for layer_idx in [2, 3, 4, 5]:
- module = backbone[layer_idx]
- if hasattr(module, 'out_channels'):
- in_channels_list.append(module.out_channels)
- elif hasattr(module[-1], 'out_channels'):
- # 如果module本身没有out_channels,检查最后一个子模块
- in_channels_list.append(module[-1].out_channels)
- else:
- raise ValueError(f"Cannot determine out_channels for layer {layer_idx}")
- # 使用BackboneWithFPN包装backbone
- backbone_with_fpn = BackboneWithFPN(
- backbone=backbone,
- return_layers=return_layers,
- in_channels_list=in_channels_list,
- out_channels=256
- )
- return backbone_with_fpn
- def get_line_net_efficientnetv2(num_classes, pretrained_backbone=True):
- # 创建EfficientNetV2 backbone
- backbone = create_efficientnetv2_backbone(pretrained=pretrained_backbone)
- # 确认 backbone 输出特征图数量
- with torch.no_grad():
- images = torch.rand(1,3, 600, 800)
- features = backbone(images)
- featmap_names = list(features.keys())
- print("Feature map names:", featmap_names) # 例如 ['0', '1', '2', '3']
- # 根据实际特征层数量设置 anchors
- # num_levels = len(featmap_names)
- num_levels=5
- featmap_names= ['0', '1', '2', '3', 'pool']
- anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_levels)) # 自动生成不同大小
- print(f'anchor_sizes:{anchor_sizes}')
- aspect_ratios = ((0.5, 1.0, 2.0),) * num_levels # 所有层共享相同比例
- print(f'aspect_ratios:{aspect_ratios}')
- anchor_generator = AnchorGenerator(
- sizes=anchor_sizes,
- aspect_ratios=aspect_ratios
- )
- # ROI Pooling
- roi_pooler = MultiScaleRoIAlign(
- featmap_names=featmap_names,
- output_size=7,
- sampling_ratio=2
- )
- # 构建模型
- model = LineNet(
- backbone=backbone,
- num_classes=num_classes,
- rpn_anchor_generator=anchor_generator,
- box_roi_pool=roi_pooler
- )
- return model
- def get_line_net_convnext_fpn(num_classes=91):
- backbone=get_convnext_fpn()
- featmap_names = ['0', '1', '2', '3', 'pool']
- roi_pooler = MultiScaleRoIAlign(
- featmap_names=featmap_names,
- output_size=7,
- sampling_ratio=2
- )
- test_input = torch.rand(1, 3, 224, 224)
- anchor_generator = get_anchor_generator(backbone, test_input)
- model = LineNet(
- backbone=backbone,
- num_classes=num_classes, # COCO 数据集有 91 类
- rpn_anchor_generator=anchor_generator,
- box_roi_pool=roi_pooler
- )
- return model
- class LineNet_ResNet50_FPN_Weights(WeightsEnum):
- COCO_V1 = Weights(
- url="https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth",
- transforms=ObjectDetection,
- meta={
- **_COMMON_META,
- "num_params": 41755286,
- "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-resnet-50-fpn",
- "_metrics": {
- "COCO-val2017": {
- "box_map": 37.0,
- }
- },
- "_ops": 134.38,
- "_file_size": 159.743,
- "_docs": """These weights were produced by following a similar training recipe as on the paper.""",
- },
- )
- DEFAULT = COCO_V1
- class LineNet_ResNet50_FPN_V2_Weights(WeightsEnum):
- COCO_V1 = Weights(
- url="https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth",
- transforms=ObjectDetection,
- meta={
- **_COMMON_META,
- "num_params": 43712278,
- "recipe": "https://github.com/pytorch/vision/pull/5763",
- "_metrics": {
- "COCO-val2017": {
- "box_map": 46.7,
- }
- },
- "_ops": 280.371,
- "_file_size": 167.104,
- "_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
- },
- )
- DEFAULT = COCO_V1
- class LineNet_MobileNet_V3_Large_FPN_Weights(WeightsEnum):
- COCO_V1 = Weights(
- url="https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_fpn-fb6a3cc7.pth",
- transforms=ObjectDetection,
- meta={
- **_COMMON_META,
- "num_params": 19386354,
- "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-fpn",
- "_metrics": {
- "COCO-val2017": {
- "box_map": 32.8,
- }
- },
- "_ops": 4.494,
- "_file_size": 74.239,
- "_docs": """These weights were produced by following a similar training recipe as on the paper.""",
- },
- )
- DEFAULT = COCO_V1
- class LineNet_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum):
- COCO_V1 = Weights(
- url="https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_320_fpn-907ea3f9.pth",
- transforms=ObjectDetection,
- meta={
- **_COMMON_META,
- "num_params": 19386354,
- "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-320-fpn",
- "_metrics": {
- "COCO-val2017": {
- "box_map": 22.8,
- }
- },
- "_ops": 0.719,
- "_file_size": 74.239,
- "_docs": """These weights were produced by following a similar training recipe as on the paper.""",
- },
- )
- DEFAULT = COCO_V1
- def linenet_newresnet18fpn(
- *,
- weights: Optional[LineNet_ResNet50_FPN_Weights] = None,
- progress: bool = True,
- num_classes: Optional[int] = None,
- weights_backbone: Optional[ResNet18_Weights] = ResNet18_Weights.IMAGENET1K_V1,
- trainable_backbone_layers: Optional[int] = None,
- **kwargs: Any,
- ) -> LineNet:
- # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
- # weights_backbone = ResNet50_Weights.verify(weights_backbone)
- if weights is not None:
- weights_backbone = None
- num_classes = _ovewrite_value_param("num_classes", num_classes, len(weights.meta["categories"]))
- elif num_classes is None:
- num_classes = 91
- if weights_backbone is not None:
- print(f'resnet50 weights is not None')
- is_trained = weights is not None or weights_backbone is not None
- trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
- norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
- backbone =resnet18fpn()
- featmap_names=['0', '1', '2', '3','pool']
- # print(f'featmap_names:{featmap_names}')
- roi_pooler = MultiScaleRoIAlign(
- featmap_names=featmap_names,
- output_size=7,
- sampling_ratio=2
- )
- num_features=len(featmap_names)
- anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
- # print(f'anchor_sizes:{anchor_sizes}')
- aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
- # print(f'aspect_ratios:{aspect_ratios}')
- anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
- # anchors = anchor_generator.generate_anchors()
- # print("Number of anchor sizes:", len(anchor_generator.sizes)) # 应为 5
- model = LineNet(backbone, num_classes=num_classes,anchor_generator=anchor_generator,
- box_roi_pool=roi_pooler,
- **kwargs)
- if weights is not None:
- model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
- if weights == LineNet_ResNet50_FPN_Weights.COCO_V1:
- overwrite_eps(model, 0.0)
- return model
- def linenet_newresnet50fpn(
- *,
- weights: Optional[LineNet_ResNet50_FPN_Weights] = None,
- progress: bool = True,
- num_classes: Optional[int] = None,
- weights_backbone: Optional[ResNet18_Weights] = ResNet18_Weights.IMAGENET1K_V1,
- trainable_backbone_layers: Optional[int] = None,
- **kwargs: Any,
- ) -> LineNet:
- # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
- # weights_backbone = ResNet50_Weights.verify(weights_backbone)
- if weights is not None:
- weights_backbone = None
- num_classes = _ovewrite_value_param("num_classes", num_classes, len(weights.meta["categories"]))
- elif num_classes is None:
- num_classes = 91
- if weights_backbone is not None:
- print(f'resnet50 weights is not None')
- is_trained = weights is not None or weights_backbone is not None
- trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
- norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
- backbone =resnet50fpn()
- featmap_names=['0', '1', '2', '3','pool']
- # print(f'featmap_names:{featmap_names}')
- roi_pooler = MultiScaleRoIAlign(
- featmap_names=featmap_names,
- output_size=7,
- sampling_ratio=2
- )
- num_features=len(featmap_names)
- anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
- # print(f'anchor_sizes:{anchor_sizes}')
- aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
- # print(f'aspect_ratios:{aspect_ratios}')
- anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
- # anchors = anchor_generator.generate_anchors()
- # print("Number of anchor sizes:", len(anchor_generator.sizes)) # 应为 5
- model = LineNet(backbone, num_classes=num_classes,anchor_generator=anchor_generator,
- box_roi_pool=roi_pooler,
- **kwargs)
- if weights is not None:
- model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
- if weights == LineNet_ResNet50_FPN_Weights.COCO_V1:
- overwrite_eps(model, 0.0)
- return model
- # @register_model()
- # @handle_legacy_interface(
- # weights=("pretrained", LineNet_ResNet50_FPN_Weights.COCO_V1),
- # weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
- # )
- def linenet_resnet18_fpn(
- *,
- weights: Optional[LineNet_ResNet50_FPN_Weights] = None,
- progress: bool = True,
- num_classes: Optional[int] = None,
- weights_backbone: Optional[ResNet18_Weights] = ResNet18_Weights.IMAGENET1K_V1,
- trainable_backbone_layers: Optional[int] = None,
- **kwargs: Any,
- ) -> LineNet:
- # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
- # weights_backbone = ResNet50_Weights.verify(weights_backbone)
- if weights is not None:
- weights_backbone = None
- num_classes = _ovewrite_value_param("num_classes", num_classes, len(weights.meta["categories"]))
- elif num_classes is None:
- num_classes = 91
- if weights_backbone is not None:
- print(f'resnet50 weights is not None')
- is_trained = weights is not None or weights_backbone is not None
- trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
- norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
- backbone = resnet18(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
- backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
- model = LineNet(backbone, num_classes=num_classes, **kwargs)
- if weights is not None:
- model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
- if weights == LineNet_ResNet50_FPN_Weights.COCO_V1:
- overwrite_eps(model, 0.0)
- return model
- def linenet_resnet50_fpn(
- *,
- weights: Optional[LineNet_ResNet50_FPN_Weights] = None,
- progress: bool = True,
- num_classes: Optional[int] = None,
- weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
- trainable_backbone_layers: Optional[int] = None,
- **kwargs: Any,
- ) -> LineNet:
- """
- Faster R-CNN model with a ResNet-50-FPN backbone from the `Faster R-CNN: Towards Real-Time Object
- Detection with Region Proposal Networks <https://arxiv.org/abs/1506.01497>`__
- paper.
- .. betastatus:: detection module
- The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
- image, and should be in ``0-1`` range. Different images can have different sizes.
- The behavior of the model changes depending on if it is in training or evaluation mode.
- During training, the model expects both the input tensors and a targets (list of dictionary),
- containing:
- - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
- ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
- - labels (``Int64Tensor[N]``): the class label for each ground-truth box
- The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
- losses for both the RPN and the R-CNN.
- During inference, the model requires only the input tensors, and returns the post-processed
- predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
- follows, where ``N`` is the number of detections:
- - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
- ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
- - labels (``Int64Tensor[N]``): the predicted labels for each detection
- - scores (``Tensor[N]``): the scores of each detection
- For more details on the output, you may refer to :ref:`instance_seg_output`.
- Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.
- Example::
- >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
- >>> # For training
- >>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
- >>> boxes[:, :, 2:4] = boxes[:, :, 0:2] + boxes[:, :, 2:4]
- >>> labels = torch.randint(1, 91, (4, 11))
- >>> images = list(image for image in images)
- >>> targets = []
- >>> for i in range(len(images)):
- >>> d = {}
- >>> d['boxes'] = boxes[i]
- >>> d['labels'] = labels[i]
- >>> targets.append(d)
- >>> output = model(images, targets)
- >>> # For inference
- >>> model.eval()
- >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
- >>> predictions = model(x)
- >>>
- >>> # optionally, if you want to export the model to ONNX:
- >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)
- Args:
- weights (:class:`~torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- num_classes (int, optional): number of output classes of the model (including the background)
- weights_backbone (:class:`~torchvision.models.ResNet50_Weights`, optional): The
- pretrained weights for the backbone.
- trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from
- final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are
- trainable. If ``None`` is passed (the default) this value is set to 3.
- **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/faster_rcnn.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights
- :members:
- """
- weights = LineNet_ResNet50_FPN_Weights.verify(weights)
- weights_backbone = ResNet50_Weights.verify(weights_backbone)
- if weights is not None:
- weights_backbone = None
- num_classes = _ovewrite_value_param("num_classes", num_classes, len(weights.meta["categories"]))
- elif num_classes is None:
- num_classes = 91
- if weights_backbone is not None:
- print(f'resnet50 weights is not None')
- is_trained = weights is not None or weights_backbone is not None
- trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
- norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
- backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
- backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
- model = LineNet(backbone, num_classes=num_classes, **kwargs)
- if weights is not None:
- model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
- if weights == LineNet_ResNet50_FPN_Weights.COCO_V1:
- overwrite_eps(model, 0.0)
- return model
- # @register_model()
- # @handle_legacy_interface(
- # weights=("pretrained", LineNet_ResNet50_FPN_V2_Weights.COCO_V1),
- # weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
- # )
- def linenet_resnet50_fpn_v2(
- *,
- weights: Optional[LineNet_ResNet50_FPN_V2_Weights] = None,
- progress: bool = True,
- num_classes: Optional[int] = None,
- weights_backbone: Optional[ResNet50_Weights] = None,
- trainable_backbone_layers: Optional[int] = None,
- **kwargs: Any,
- ) -> LineNet:
- """
- Constructs an improved Faster R-CNN model with a ResNet-50-FPN backbone from `Benchmarking Detection
- Transfer Learning with Vision Transformers <https://arxiv.org/abs/2111.11429>`__ paper.
- .. betastatus:: detection module
- It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
- :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more
- details.
- Args:
- weights (:class:`~torchvision.models.detection.FasterRCNN_ResNet50_FPN_V2_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.detection.FasterRCNN_ResNet50_FPN_V2_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- num_classes (int, optional): number of output classes of the model (including the background)
- weights_backbone (:class:`~torchvision.models.ResNet50_Weights`, optional): The
- pretrained weights for the backbone.
- trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from
- final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are
- trainable. If ``None`` is passed (the default) this value is set to 3.
- **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/faster_rcnn.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.detection.FasterRCNN_ResNet50_FPN_V2_Weights
- :members:
- """
- weights = LineNet_ResNet50_FPN_V2_Weights.verify(weights)
- weights_backbone = ResNet50_Weights.verify(weights_backbone)
- if weights is not None:
- weights_backbone = None
- num_classes = _ovewrite_value_param("num_classes", num_classes, len(weights.meta["categories"]))
- elif num_classes is None:
- num_classes = 91
- is_trained = weights is not None or weights_backbone is not None
- trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
- backbone = resnet50(weights=weights_backbone, progress=progress)
- backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers, norm_layer=nn.BatchNorm2d)
- rpn_anchor_generator = _default_anchorgen()
- rpn_head = RPNHead(backbone.out_channels, rpn_anchor_generator.num_anchors_per_location()[0], conv_depth=2)
- box_head = LineNetConvFCHead(
- (backbone.out_channels, 7, 7), [256, 256, 256, 256], [1024], norm_layer=nn.BatchNorm2d
- )
- model = LineNet(
- backbone,
- num_classes=num_classes,
- rpn_anchor_generator=rpn_anchor_generator,
- rpn_head=rpn_head,
- box_head=box_head,
- **kwargs,
- )
- if weights is not None:
- model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
- return model
- def _linenet_mobilenet_v3_large_fpn(
- *,
- weights: Optional[Union[LineNet_MobileNet_V3_Large_FPN_Weights, LineNet_MobileNet_V3_Large_320_FPN_Weights]],
- progress: bool,
- num_classes: Optional[int],
- weights_backbone: Optional[MobileNet_V3_Large_Weights],
- trainable_backbone_layers: Optional[int],
- **kwargs: Any,
- ) -> LineNet:
- if weights is not None:
- weights_backbone = None
- num_classes = _ovewrite_value_param("num_classes", num_classes, len(weights.meta["categories"]))
- elif num_classes is None:
- num_classes = 91
- is_trained = weights is not None or weights_backbone is not None
- trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 6, 3)
- norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
- backbone = mobilenet_v3_large(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
- backbone = _mobilenet_extractor(backbone, True, trainable_backbone_layers)
- anchor_sizes = (
- (
- 32,
- 64,
- 128,
- 256,
- 512,
- ),
- ) * 3
- aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
- model = LineNet(
- backbone, num_classes, rpn_anchor_generator=AnchorGenerator(anchor_sizes, aspect_ratios), **kwargs
- )
- if weights is not None:
- model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
- return model
- # @register_model()
- # @handle_legacy_interface(
- # weights=("pretrained", LineNet_MobileNet_V3_Large_320_FPN_Weights.COCO_V1),
- # weights_backbone=("pretrained_backbone", MobileNet_V3_Large_Weights.IMAGENET1K_V1),
- # )
- def linenet_mobilenet_v3_large_320_fpn(
- *,
- weights: Optional[LineNet_MobileNet_V3_Large_320_FPN_Weights] = None,
- progress: bool = True,
- num_classes: Optional[int] = None,
- weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
- trainable_backbone_layers: Optional[int] = None,
- **kwargs: Any,
- ) -> LineNet:
- """
- Low resolution Faster R-CNN model with a MobileNetV3-Large backbone tuned for mobile use cases.
- .. betastatus:: detection module
- It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
- :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more
- details.
- Example::
- >>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT)
- >>> model.eval()
- >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
- >>> predictions = model(x)
- Args:
- weights (:class:`~torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_320_FPN_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_320_FPN_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- num_classes (int, optional): number of output classes of the model (including the background)
- weights_backbone (:class:`~torchvision.models.MobileNet_V3_Large_Weights`, optional): The
- pretrained weights for the backbone.
- trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from
- final block. Valid values are between 0 and 6, with 6 meaning all backbone layers are
- trainable. If ``None`` is passed (the default) this value is set to 3.
- **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/faster_rcnn.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_320_FPN_Weights
- :members:
- """
- weights = LineNet_MobileNet_V3_Large_320_FPN_Weights.verify(weights)
- weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone)
- defaults = {
- "min_size": 320,
- "max_size": 640,
- "rpn_pre_nms_top_n_test": 150,
- "rpn_post_nms_top_n_test": 150,
- "rpn_score_thresh": 0.05,
- }
- kwargs = {**defaults, **kwargs}
- return _linenet_mobilenet_v3_large_fpn(
- weights=weights,
- progress=progress,
- num_classes=num_classes,
- weights_backbone=weights_backbone,
- trainable_backbone_layers=trainable_backbone_layers,
- **kwargs,
- )
- # @register_model()
- # @handle_legacy_interface(
- # weights=("pretrained", LineNet_MobileNet_V3_Large_FPN_Weights.COCO_V1),
- # weights_backbone=("pretrained_backbone", MobileNet_V3_Large_Weights.IMAGENET1K_V1),
- # )
- def linenet_mobilenet_v3_large_fpn(
- *,
- weights: Optional[LineNet_MobileNet_V3_Large_FPN_Weights] = None,
- progress: bool = True,
- num_classes: Optional[int] = None,
- weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
- trainable_backbone_layers: Optional[int] = None,
- **kwargs: Any,
- ) -> LineNet:
- """
- Constructs a high resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone.
- .. betastatus:: detection module
- It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
- :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more
- details.
- Example::
- >>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT)
- >>> model.eval()
- >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
- >>> predictions = model(x)
- Args:
- weights (:class:`~torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights`, optional): The
- pretrained weights to use. See
- :class:`~torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights` below for
- more details, and possible values. By default, no pre-trained
- weights are used.
- progress (bool, optional): If True, displays a progress bar of the
- download to stderr. Default is True.
- num_classes (int, optional): number of output classes of the model (including the background)
- weights_backbone (:class:`~torchvision.models.MobileNet_V3_Large_Weights`, optional): The
- pretrained weights for the backbone.
- trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from
- final block. Valid values are between 0 and 6, with 6 meaning all backbone layers are
- trainable. If ``None`` is passed (the default) this value is set to 3.
- **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN``
- base class. Please refer to the `source code
- <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/faster_rcnn.py>`_
- for more details about this class.
- .. autoclass:: torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights
- :members:
- """
- weights = LineNet_MobileNet_V3_Large_FPN_Weights.verify(weights)
- weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone)
- defaults = {
- "rpn_score_thresh": 0.05,
- }
- kwargs = {**defaults, **kwargs}
- return _linenet_mobilenet_v3_large_fpn(
- weights=weights,
- progress=progress,
- num_classes=num_classes,
- weights_backbone=weights_backbone,
- trainable_backbone_layers=trainable_backbone_layers,
- **kwargs,
- )
|