|
|
@@ -1,12 +1,22 @@
|
|
|
-from libs.vision_libs.models import mobilenet_v3_large
|
|
|
+from collections import OrderedDict
|
|
|
+
|
|
|
+from libs.vision_libs import models
|
|
|
+from libs.vision_libs.models import mobilenet_v3_large, EfficientNet_V2_S_Weights, efficientnet_v2_s, \
|
|
|
+ EfficientNet_V2_M_Weights, efficientnet_v2_m, EfficientNet_V2_L_Weights, efficientnet_v2_l
|
|
|
from libs.vision_libs.models._utils import _ovewrite_value_param, handle_legacy_interface
|
|
|
+from libs.vision_libs.models.detection import FasterRCNN
|
|
|
+from libs.vision_libs.models.detection.anchor_utils import AnchorGenerator
|
|
|
from libs.vision_libs.models.detection.ssdlite import _mobilenet_extractor
|
|
|
from libs.vision_libs.models.resnet import resnet50, ResNet50_Weights, resnet18
|
|
|
from libs.vision_libs.models.detection._utils import overwrite_eps
|
|
|
from libs.vision_libs.models.detection.backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
|
|
|
-from libs.vision_libs.ops import misc as misc_nn_ops
|
|
|
+from libs.vision_libs.ops import misc as misc_nn_ops, MultiScaleRoIAlign
|
|
|
from torch import nn
|
|
|
|
|
|
+import torch
|
|
|
+from torchvision.models.detection.backbone_utils import BackboneWithFPN, resnet_fpn_backbone
|
|
|
+from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork, LastLevelMaxPool
|
|
|
+
|
|
|
|
|
|
def get_resnet50_fpn():
|
|
|
is_trained = False
|
|
|
@@ -16,6 +26,7 @@ def get_resnet50_fpn():
|
|
|
backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
|
|
|
return backbone
|
|
|
|
|
|
+
|
|
|
def get_resnet18_fpn():
|
|
|
is_trained = False
|
|
|
trainable_backbone_layers = _validate_trainable_layers(is_trained, None, 5, 3)
|
|
|
@@ -24,11 +35,152 @@ def get_resnet18_fpn():
|
|
|
backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
|
|
|
return backbone
|
|
|
|
|
|
+
|
|
|
def get_mobilenet_v3_large_fpn():
|
|
|
- is_trained =False
|
|
|
+ is_trained = False
|
|
|
trainable_backbone_layers = _validate_trainable_layers(is_trained, None, 6, 3)
|
|
|
norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
|
|
|
|
|
|
backbone = mobilenet_v3_large(weights=None, progress=True, norm_layer=norm_layer)
|
|
|
backbone = _mobilenet_extractor(backbone, True, trainable_backbone_layers)
|
|
|
- return backbone
|
|
|
+ return backbone
|
|
|
+def get_convnext_fpn():
|
|
|
+ convnext = models.convnext_base(pretrained=True)
|
|
|
+ in_channels_list = [128, 256, 512, 1024]
|
|
|
+ backbone_with_fpn = BackboneWithFPN(
|
|
|
+ convnext.features,
|
|
|
+ return_layers={'1': '0', '3': '1', '5': '2', '7': '3'}, # 确保这些键对应到实际的层
|
|
|
+ in_channels_list=in_channels_list,
|
|
|
+ out_channels=256
|
|
|
+ )
|
|
|
+ return backbone_with_fpn
|
|
|
+
|
|
|
+def get_efficientnetv2_fpn(name='efficientnet_v2_m', pretrained=True):
|
|
|
+ # 加载EfficientNetV2模型
|
|
|
+ if name == 'efficientnet_v2_s':
|
|
|
+ weights = EfficientNet_V2_S_Weights.IMAGENET1K_V1 if pretrained else None
|
|
|
+ backbone = efficientnet_v2_s(weights=weights).features
|
|
|
+ if name == 'efficientnet_v2_m':
|
|
|
+ weights = EfficientNet_V2_M_Weights.IMAGENET1K_V1 if pretrained else None
|
|
|
+ backbone = efficientnet_v2_m(weights=weights).features
|
|
|
+ if name == 'efficientnet_v2_l':
|
|
|
+ weights = EfficientNet_V2_L_Weights.IMAGENET1K_V1 if pretrained else None
|
|
|
+ backbone = efficientnet_v2_l(weights=weights).features
|
|
|
+
|
|
|
+ # 定义返回的层索引和名称
|
|
|
+ return_layers = {"2": "0", "3": "1", "4": "2", "5": "3"}
|
|
|
+
|
|
|
+ # 获取每个层输出通道数
|
|
|
+ in_channels_list = []
|
|
|
+ for layer_idx in [2, 3, 4, 5]:
|
|
|
+ module = backbone[layer_idx]
|
|
|
+ if hasattr(module, 'out_channels'):
|
|
|
+ in_channels_list.append(module.out_channels)
|
|
|
+ elif hasattr(module[-1], 'out_channels'):
|
|
|
+ # 如果module本身没有out_channels,检查最后一个子模块
|
|
|
+ in_channels_list.append(module[-1].out_channels)
|
|
|
+ else:
|
|
|
+ raise ValueError(f"Cannot determine out_channels for layer {layer_idx}")
|
|
|
+
|
|
|
+ # 使用BackboneWithFPN包装backbone
|
|
|
+ backbone_with_fpn = BackboneWithFPN(
|
|
|
+ backbone=backbone,
|
|
|
+ return_layers=return_layers,
|
|
|
+ in_channels_list=in_channels_list,
|
|
|
+ out_channels=256
|
|
|
+ )
|
|
|
+
|
|
|
+ return backbone_with_fpn
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+# 加载 ConvNeXt 模型
|
|
|
+convnext = models.convnext_base(pretrained=True)
|
|
|
+# convnext = models.convnext_tiny(pretrained=True)
|
|
|
+# convnext = models.convnext_small(pretrained=True)
|
|
|
+# print(convnext)
|
|
|
+# # 打印模型的所有命名层
|
|
|
+# for name, _ in convnext.features[5].named_children():
|
|
|
+# print(name)
|
|
|
+
|
|
|
+# 修改 ConvNeXt 以适应 Faster R-CNN
|
|
|
+# 修改 ConvNeXt 以适应 Faster R-CNN
|
|
|
+
|
|
|
+def get_anchor_generator(backbone, test_input):
|
|
|
+ features = backbone(test_input) # 获取 backbone 输出的所有特征图
|
|
|
+ featmap_names = list(features.keys())
|
|
|
+ print(f'featmap_names:{featmap_names}')
|
|
|
+ num_features = len(features) # 特征图数量
|
|
|
+ print(f'num_features:{num_features}')
|
|
|
+ # num_features=num_features-1
|
|
|
+
|
|
|
+ # # 定义每层的 anchor 尺寸和比例
|
|
|
+ # base_sizes = [32, 64, 128] # 支持最多 4 层
|
|
|
+ # sizes = tuple((size,) for size in base_sizes[:num_features])
|
|
|
+
|
|
|
+ anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features)) # 自动生成不同大小
|
|
|
+ print(f'anchor_sizes:{anchor_sizes }')
|
|
|
+ aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
|
|
|
+ print(f'aspect_ratios:{aspect_ratios}')
|
|
|
+
|
|
|
+ return AnchorGenerator(sizes=anchor_sizes , aspect_ratios=aspect_ratios)
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ # 创建 ConvNeXt backbone
|
|
|
+ convnext = models.convnext_base(pretrained=True)
|
|
|
+ for i,layer in enumerate(convnext.features):
|
|
|
+ print(f'layer{i}:{layer}')
|
|
|
+ # 创建一个小的输入张量用于获取各层输出通道数
|
|
|
+ dummy_input = torch.randn(1, 3, 224, 224)
|
|
|
+ # output_channels_list = get_output_channels(convnext.features, dummy_input)
|
|
|
+ # print(f'output_channels_list:{output_channels_list}')
|
|
|
+
|
|
|
+ # 根据之前的经验,选择合适的层索引
|
|
|
+ selected_layers = [3, 5, 7] # 假设这是我们要用作 FPN 输入的层索引
|
|
|
+ in_channels_list = [128,256,512,1024]
|
|
|
+ print(f'in_channels_list:{in_channels_list}')
|
|
|
+
|
|
|
+ # 创建 FPN
|
|
|
+ backbone_with_fpn = BackboneWithFPN(
|
|
|
+ convnext.features,
|
|
|
+ return_layers={'1':'0','3': '1', '5': '2', '7': '3'}, # 确保这些键对应到实际的层
|
|
|
+ in_channels_list=in_channels_list,
|
|
|
+ out_channels=256
|
|
|
+ )
|
|
|
+
|
|
|
+ # 创建 Faster R-CNN 模型
|
|
|
+ # anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
|
|
|
+ # aspect_ratios=((0.5, 1.0, 2.0),))
|
|
|
+ # anchor_generator = AnchorGenerator(
|
|
|
+ # sizes=((32,), (64,), (128,)), # ✅ 正确
|
|
|
+ # aspect_ratios=((0.5, 1.0, 2.0),) * 3 # ✅ 正确
|
|
|
+ # )
|
|
|
+ test_input = torch.rand(1, 3, 224, 224)
|
|
|
+ anchor_generator = get_anchor_generator(backbone_with_fpn, test_input)
|
|
|
+ print(f'anchor_generator:{anchor_generator}')
|
|
|
+
|
|
|
+ featmap_names=['0', '1', '2', '3', 'pool']
|
|
|
+ roi_pooler = MultiScaleRoIAlign(
|
|
|
+ featmap_names=featmap_names,
|
|
|
+ output_size=7,
|
|
|
+ sampling_ratio=2
|
|
|
+ )
|
|
|
+
|
|
|
+ model = FasterRCNN(
|
|
|
+ backbone=backbone_with_fpn,
|
|
|
+ num_classes=91, # COCO 数据集有 91 类
|
|
|
+ rpn_anchor_generator=anchor_generator,
|
|
|
+ box_roi_pool=roi_pooler
|
|
|
+ )
|
|
|
+
|
|
|
+ # 测试模型
|
|
|
+ test_input = torch.randn(1, 3, 800, 800) # 注意输入尺寸应符合 Faster R-CNN 需求
|
|
|
+ model.eval()
|
|
|
+ output = model(test_input)
|
|
|
+ print(f'output: {output}')
|
|
|
+
|
|
|
+ # 测试模型
|
|
|
+ dummy_input = torch.randn(1, 3, 800, 800) # 注意输入尺寸应符合 Faster R-CNN 需求
|
|
|
+ model.eval()
|
|
|
+ output = model(dummy_input)
|
|
|
+ print(f'output:{output}')
|