5 miesięcy temu · ad42480896
--- a/libs/vision_libs/models/detection/roi_heads.py
+++ b/libs/vision_libs/models/detection/roi_heads.py
@@ -830,9 +830,9 @@ class RoIHeads(nn.Module):
 
															         # keep none checks in if conditional so torchscript will conditionally
														
 
															         # compile each branch
														
 
															         if (
														
 
															-            self.line_roi_pool is not None
														
 
															-            and self.line_head is not None
														
 
															-            and self.line_predictor is not None
														
 
															+            self.keypoint_roi_pool is not None
														
 
															+            and self.keypoint_head is not None
														
 
															+            and self.keypoint_predictor is not None
														
 
															         ):
														
 
															             keypoint_proposals = [p["boxes"] for p in result]
														
 
															             if self.training:
														
--- a/models/base/backbone_factory.py
+++ b/models/base/backbone_factory.py
@@ -18,7 +18,9 @@ from libs.vision_libs.ops import misc as misc_nn_ops, MultiScaleRoIAlign
 
															 from torch import nn
														
 
															 import torch
														
 
															-# from  libs.vision_libs.models.detection.backbone_utils import BackboneWithFPN
														
 
															+
														
 
															+
														
 
															+
														
 
															 def get_resnet50_fpn():
														
@@ -188,8 +190,7 @@ class MaxVitBackbone(torch.nn.Module):
 
															 from torchvision.models.feature_extraction import create_feature_extractor
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															+def get_swin_transformer_fpn(type='t'):
														
 
															     class Trans(nn.Module):
														
 
															         def __init__(self):
														
 
															             super().__init__()
														
@@ -199,10 +200,16 @@ if __name__ == '__main__':
 
															     class SwinTransformer(nn.Module):
														
 
															-        def __init__(self):
														
 
															+        def __init__(self,type='t'):
														
 
															             super().__init__()
														
 
															-            # 加载 Swin Transformer v2 Tiny
														
 
															             swin = torchvision.models.swin_v2_t(weights=None)
														
 
															+            if type=='t':
														
 
															+                # 加载 Swin Transformer v2 Tiny
														
 
															+                swin = torchvision.models.swin_v2_t(weights=None)
														
 
															+            if type=='s':
														
 
															+                swin=torchvision.models.swin_v2_s(weights=None)
														
 
															+            if type=='b':
														
 
															+                swin=torchvision.models.swin_v2_b(weights=None)
														
 
															             # 保存需要提取的层
														
 
															             self.patch_embed = swin.features[0]  # 第0层 patch embedding
														
@@ -215,54 +222,22 @@ if __name__ == '__main__':
 
															             self.layer7 =nn.Sequential(swin.features[7], Trans())  # 第7层 stage4
														
 
															         def forward(self, x):
														
 
															-            outputs = {}
														
 
															-            # Patch Embedding
														
 
															             x = self.patch_embed(x)  # [B, C, H, W] -> [B, H_, W_, C]
														
 
															-
														
 
															-            # Layer 1: stage1
														
 
															             x = self.layer1(x)
														
 
															-
														
 
															-            # if 'feat1' not in outputs:
														
 
															-            #     feat = x.permute(0, 3, 1, 2).contiguous()  # NHWC -> NCHW
														
 
															-            #     outputs['feat1'] = feat
														
 
															             print(f'x1:{x.shape}')
														
 
															-            # Downsample 1
														
 
															             x = self.layer2(x)
														
 
															-
														
 
															-            # Layer 2: stage2
														
 
															             x = self.layer3(x)
														
 
															-            # if 'feat2' not in outputs:
														
 
															-            #     feat = x.permute(0, 3, 1, 2).contiguous()
														
 
															-            #     outputs['feat2'] = feat
														
 
															             print(f'x2:{x.shape}')
														
 
															-
														
 
															-            # Downsample 2
														
 
															             x = self.layer4(x)
														
 
															-
														
 
															-            # Layer 3: stage3
														
 
															             x = self.layer5(x)
														
 
															             print(f'x3:{x.shape}')
														
 
															-
														
 
															-            # if 'feat3' not in outputs:
														
 
															-            #     feat = x.permute(0, 3, 1, 2).contiguous()
														
 
															-            #     outputs['feat3'] = feat
														
 
															-
														
 
															-            # Downsample 3
														
 
															             x = self.layer6(x)
														
 
															-
														
 
															-            # Layer 4: stage4
														
 
															             x = self.layer7(x)
														
 
															-            # x = x.permute(0, 3, 2, 1).contiguous()
														
 
															-
														
 
															-            # if 'feat4' not in outputs:
														
 
															-            #     feat = x.permute(0, 3, 1, 2).contiguous()
														
 
															-            #     outputs['feat4'] = feat
														
 
															             print(f'x4:{x.shape}')
														
 
															-
														
 
															             return x
														
 
															-    backbone = SwinTransformer()
														
 
															+    backbone = SwinTransformer(type=type)
														
 
															     input=torch.randn(1,3,512,512)
														
 
															     out=backbone(input)
														
 
															     # print(f'out:{out.keys()}')
														
@@ -271,18 +246,42 @@ if __name__ == '__main__':
 
															     # out=swin(input)
														
 
															     # print(f'out shape:{out.shape}')
														
 
															     #
														
 
															+
														
 
															+    channels_list = [96, 192, 384, 768]
														
 
															+    if type=='t':
														
 
															+        channels_list = [96, 192, 384, 768]
														
 
															+    if type=='s':
														
 
															+        channels_list = [96, 192, 384, 768]
														
 
															+    if type=='b':
														
 
															+        channels_list = [128, 256, 512, 1024]
														
 
															     backbone_with_fpn = BackboneWithFPN(
														
 
															         # swin.features,
														
 
															         backbone,
														
 
															         return_layers={'layer1': '0', 'layer3': '1', 'layer5': '2', 'layer7': '3'},
														
 
															-        in_channels_list=[96, 192, 384, 768],
														
 
															+        in_channels_list=channels_list,
														
 
															         out_channels=256
														
 
															     )
														
 
															+    featmap_names = ['0', '1', '2', '3', 'pool']
														
 
															+    # print(f'featmap_names:{featmap_names}')
														
 
															+    roi_pooler = MultiScaleRoIAlign(
														
 
															+        featmap_names=featmap_names,
														
 
															+        output_size=7,
														
 
															+        sampling_ratio=2
														
 
															+    )
														
 
															-    out=backbone_with_fpn(input)
														
 
															+    # out=backbone_with_fpn(input)
														
 
															+    anchor_generator = get_anchor_generator(backbone_with_fpn, test_input=input)
														
 
															-    print(f'out:{out}')
														
 
															+    # print(f'out:{out}')
														
 
															+    return  backbone_with_fpn,roi_pooler,anchor_generator
														
 
															+if __name__ == '__main__':
														
 
															+    backbone_with_fpn, roi_pooler, anchor_generator=get_swin_transformer_fpn(type='s')
														
 
															+    model=FasterRCNN(backbone=backbone_with_fpn,num_classes=3,box_roi_pool=roi_pooler,rpn_anchor_generator=anchor_generator)
														
 
															+    input=torch.randn(3,3,512,512,device='cuda')
														
 
															+    model.eval()
														
 
															+    model.to('cuda')
														
 
															+    out=model(input)
														
 
															     # # maxvit = models.maxvit_t(pretrained=True)
														
--- a/models/line_detect/line_detect.py
+++ b/models/line_detect/line_detect.py
@@ -27,7 +27,8 @@ from .loi_heads import RoIHeads
 
															 from .trainer import Trainer
														
 
															 from ..base import backbone_factory
														
 
															-from ..base.backbone_factory import get_convnext_fpn, get_anchor_generator, get_maxvit_fpn, MaxVitBackbone
														
 
															+from ..base.backbone_factory import get_convnext_fpn, get_anchor_generator, get_maxvit_fpn, MaxVitBackbone, \
														
 
															+    get_swin_transformer_fpn
														
 
															 # from ..base.backbone_factory import get_convnext_fpn, get_anchor_generator
														
 
															 from ..base.base_detection_net import BaseDetectionNet
														
 
															 import torch.nn.functional as F
														
@@ -475,6 +476,32 @@ def linedetect_high_maxvitfpn(
 
															     )
														
 
															     return model
														
 
															+def linedetect_swin_transformer_fpn(
														
 
															+        *,
														
 
															+        num_classes: Optional[int] = None,
														
 
															+        num_points:Optional[int] = None,
														
 
															+        type='t',
														
 
															+        **kwargs: Any,
														
 
															+) -> LineDetect:
														
 
															+    # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
														
 
															+    # weights_backbone = ResNet50_Weights.verify(weights_backbone)
														
 
															+    if num_classes is None:
														
 
															+        num_classes = 3
														
 
															+    if num_points is None:
														
 
															+        num_points = 3
														
 
															+    size=512
														
 
															+    backbone_with_fpn, roi_pooler, anchor_generator=get_swin_transformer_fpn(type=type)
														
 
															+    # test_input = torch.randn(1, 3,size,size)
														
 
															+    model = LineDetect(
														
 
															+        backbone=backbone_with_fpn,
														
 
															+        min_size=size,
														
 
															+        max_size=size,
														
 
															+        num_classes=3,  # COCO æ°æ®éæ 91 ç±»
														
 
															+        rpn_anchor_generator=anchor_generator,
														
 
															+        box_roi_pool=roi_pooler
														
 
															+    )
														
 
															+    return model
														
 
															+
														
 
															 def linedetect_resnet18_fpn(
														
 
															         *,
														
 
															         num_classes: Optional[int] = None,
														
--- a/models/line_detect/train_demo.py
+++ b/models/line_detect/train_demo.py
@@ -1,7 +1,7 @@
 
															 import torch
														
 
															 from models.line_detect.line_detect import linedetect_newresnet18fpn, linedetect_resnet50_fpn, linedetect_resnet18_fpn, \
														
 
															-    linedetect_newresnet50fpn, linedetect_maxvitfpn, linedetect_high_maxvitfpn
														
 
															+    linedetect_newresnet50fpn, linedetect_maxvitfpn, linedetect_high_maxvitfpn, linedetect_swin_transformer_fpn
														
 
															 from models.line_net.trainer import Trainer
														
@@ -20,5 +20,6 @@ if __name__ == '__main__':
 
															     # model = linedetect_newresnet50fpn(num_points=3)
														
 
															     # model.load_weights(save_path=r'/home/admin/projects/MultiVisionModels/models/line_detect/train_results/20250711_114046/weights/best_val.pth')
														
 
															     # model=linedetect_maxvitfpn()
														
 
															-    model=linedetect_high_maxvitfpn()
														
 
															+    # model=linedetect_high_maxvitfpn()
														
 
															+    model=linedetect_swin_transformer_fpn(type='s')
														
 
															     model.start_train(cfg='train.yaml')