| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185 |
- from collections import OrderedDict
- from libs.vision_libs import models
- from libs.vision_libs.models import mobilenet_v3_large, EfficientNet_V2_S_Weights, efficientnet_v2_s, \
- EfficientNet_V2_M_Weights, efficientnet_v2_m, EfficientNet_V2_L_Weights, efficientnet_v2_l, ConvNeXt_Base_Weights
- from libs.vision_libs.models._utils import _ovewrite_value_param, handle_legacy_interface
- from libs.vision_libs.models.detection import FasterRCNN
- from libs.vision_libs.models.detection.anchor_utils import AnchorGenerator
- from libs.vision_libs.models.detection.ssdlite import _mobilenet_extractor
- from libs.vision_libs.models.resnet import resnet50, ResNet50_Weights, resnet18
- from libs.vision_libs.models.detection.backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
- from libs.vision_libs.ops import misc as misc_nn_ops, MultiScaleRoIAlign
- from torch import nn
- import torch
- from libs.vision_libs.models.detection.backbone_utils import BackboneWithFPN
- def get_resnet50_fpn():
- is_trained = False
- trainable_backbone_layers = _validate_trainable_layers(is_trained, None, 5, 3)
- norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
- backbone = resnet50(weights=None, progress=True, norm_layer=norm_layer)
- backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
- return backbone
- def get_resnet18_fpn():
- is_trained = False
- trainable_backbone_layers = _validate_trainable_layers(is_trained, None, 5, 3)
- norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
- backbone = resnet18(weights=None, progress=True, norm_layer=norm_layer)
- backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
- return backbone
- def get_mobilenet_v3_large_fpn():
- is_trained = False
- trainable_backbone_layers = _validate_trainable_layers(is_trained, None, 6, 3)
- norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
- backbone = mobilenet_v3_large(weights=None, progress=True, norm_layer=norm_layer)
- backbone = _mobilenet_extractor(backbone, True, trainable_backbone_layers)
- return backbone
- def get_convnext_fpn():
- convnext = models.convnext_base(weights=ConvNeXt_Base_Weights.DEFAULT)
- # convnext = models.convnext_small(pretrained=True)
- # convnext = models.convnext_large(pretrained=True)
- in_channels_list = [128, 256, 512, 1024]
- backbone_with_fpn = BackboneWithFPN(
- convnext.features,
- return_layers={'1': '0', '3': '1', '5': '2', '7': '3'}, # 确保这些键对应到实际的层
- in_channels_list=in_channels_list,
- out_channels=256
- )
- return backbone_with_fpn
- def get_efficientnetv2_fpn(name='efficientnet_v2_m', pretrained=True):
- # 加载EfficientNetV2模型
- if name == 'efficientnet_v2_s':
- weights = EfficientNet_V2_S_Weights.IMAGENET1K_V1 if pretrained else None
- backbone = efficientnet_v2_s(weights=weights).features
- if name == 'efficientnet_v2_m':
- weights = EfficientNet_V2_M_Weights.IMAGENET1K_V1 if pretrained else None
- backbone = efficientnet_v2_m(weights=weights).features
- if name == 'efficientnet_v2_l':
- weights = EfficientNet_V2_L_Weights.IMAGENET1K_V1 if pretrained else None
- backbone = efficientnet_v2_l(weights=weights).features
- # 定义返回的层索引和名称
- return_layers = {"2": "0", "3": "1", "4": "2", "5": "3"}
- # 获取每个层输出通道数
- in_channels_list = []
- for layer_idx in [2, 3, 4, 5]:
- module = backbone[layer_idx]
- if hasattr(module, 'out_channels'):
- in_channels_list.append(module.out_channels)
- elif hasattr(module[-1], 'out_channels'):
- # 如果module本身没有out_channels,检查最后一个子模块
- in_channels_list.append(module[-1].out_channels)
- else:
- raise ValueError(f"Cannot determine out_channels for layer {layer_idx}")
- # 使用BackboneWithFPN包装backbone
- backbone_with_fpn = BackboneWithFPN(
- backbone=backbone,
- return_layers=return_layers,
- in_channels_list=in_channels_list,
- out_channels=256
- )
- return backbone_with_fpn
- # 加载 ConvNeXt 模型
- convnext = models.convnext_base(pretrained=True)
- # convnext = models.convnext_tiny(pretrained=True)
- # convnext = models.convnext_small(pretrained=True)
- # print(convnext)
- # # 打印模型的所有命名层
- # for name, _ in convnext.features[5].named_children():
- # print(name)
- # 修改 ConvNeXt 以适应 Faster R-CNN
- # 修改 ConvNeXt 以适应 Faster R-CNN
- def get_anchor_generator(backbone, test_input):
- features = backbone(test_input) # 获取 backbone 输出的所有特征图
- featmap_names = list(features.keys())
- print(f'featmap_names:{featmap_names}')
- num_features = len(features) # 特征图数量
- print(f'num_features:{num_features}')
- # num_features=num_features-1
- # # 定义每层的 anchor 尺寸和比例
- # base_sizes = [32, 64, 128] # 支持最多 4 层
- # sizes = tuple((size,) for size in base_sizes[:num_features])
- anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
- print(f'anchor_sizes:{anchor_sizes }')
- aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
- print(f'aspect_ratios:{aspect_ratios}')
- return AnchorGenerator(sizes=anchor_sizes , aspect_ratios=aspect_ratios)
- class MaxVitBackbone(torch.nn.Module):
- def __init__(self):
- super(MaxVitBackbone, self).__init__()
- # 提取MaxVit的部分层作为特征提取器
- maxvit_model = models.maxvit_t(pretrained=True)
- self.stem = maxvit_model.stem # Stem层
- self.block0= maxvit_model.blocks[0]
- self.block1 = maxvit_model.blocks[1]
- self.block2 = maxvit_model.blocks[2]
- self.block3 = maxvit_model.blocks[3]
- def forward(self, x):
- # features = {}
- x = self.stem(x)
- x=self.block0(x)
- x = self.block1(x)
- x = self.block2(x)
- x = self.block3(x)
- return x
- if __name__ == '__main__':
- # maxvit = models.maxvit_t(pretrained=True)
- maxvit=MaxVitBackbone()
- # print(maxvit.named_children())
- for i,layer in enumerate(maxvit.named_children()):
- print(f'layer:{i}:{layer}')
- in_channels_list = [64,64,128, 256, 512]
- backbone_with_fpn = BackboneWithFPN(
- maxvit,
- return_layers={'stem': '0','block0':'1','block1':'2','block2':'3','block3':'4'}, # 确保这些键对应到实际的层
- in_channels_list=in_channels_list,
- out_channels=256
- )
- model = FasterRCNN(
- backbone=backbone_with_fpn,
- num_classes=91, # COCO 数据集有 91 类
- # rpn_anchor_generator=anchor_generator,
- # box_roi_pool=roi_pooler
- )
- test_input = torch.randn(1, 3, 896, 896)
- with torch.no_grad():
- output = backbone_with_fpn(test_input)
- print("Output feature maps:")
- for k, v in output.items():
- print(f"{k}: {v.shape}")
- model.eval()
- output=model(test_input)
|