Ver código fonte

修改高分辨率resnet 为5层

lstrlq 5 meses atrás
pai
commit
c708caed2e

+ 22 - 20
models/base/high_reso_resnet.py

@@ -8,7 +8,7 @@ from typing import Any, Callable, List, Optional, Type, Union
 from torchvision.models.detection.backbone_utils import BackboneWithFPN
 
 # ----------------------------
-# 工具函数
+# 工具函数
 # ----------------------------
 
 def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
@@ -30,7 +30,7 @@ def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 
 # ----------------------------
-# Bottleneck Block(你提供的)
+# Bottleneck Block(你提供的)
 # ----------------------------
 
 class Bottleneck(nn.Module):
@@ -85,22 +85,22 @@ class Bottleneck(nn.Module):
         return out
 
 # ----------------------------
-# ResNet 主类
+# ResNet 主类
 # ----------------------------
 
 def resnet18fpn(out_channels=256):
-    backbone = ResNet(Bottleneck,[2,2,2])
+    backbone = ResNet(Bottleneck,[2,2,2,2])
     return_layers = {
         'encoder0': '0',
         'encoder1': '1',
         'encoder2': '2',
         'encoder3': '3',
-        # 'encoder4': '5'
+        'encoder4': '4'
     }
 
     # in_channels_list = [self.inplanes, 64, 128, 256, 512]
-    # in_channels_list = [64, 256, 512, 1024, 2048]
-    in_channels_list = [64, 256, 512, 1024]
+    in_channels_list = [64, 256, 512, 1024, 2048]
+    # in_channels_list = [64, 256, 512, 1024]
 
     return BackboneWithFPN(
         backbone,
@@ -110,18 +110,18 @@ def resnet18fpn(out_channels=256):
     )
 
 def resnet50fpn(out_channels=256):
-    backbone = ResNet(Bottleneck,[3,4,6])
+    backbone = ResNet(Bottleneck,[3,4,6,3])
     return_layers = {
         'encoder0': '0',
         'encoder1': '1',
         'encoder2': '2',
         'encoder3': '3',
-        # 'encoder4': '5'
+        'encoder4': '4'
     }
 
     # in_channels_list = [self.inplanes, 64, 128, 256, 512]
-    # in_channels_list = [64, 256, 512, 1024, 2048]
-    in_channels_list = [64, 256, 512, 1024]
+    in_channels_list = [64, 256, 512, 1024, 2048]
+    # in_channels_list = [64, 256, 512, 1024]
 
     return BackboneWithFPN(
         backbone,
@@ -150,6 +150,7 @@ class ResNet(nn.Module):
         self.encoder1 = self._make_layer(block, 64, layers[0],stride=2)
         self.encoder2 = self._make_layer(block, 128, layers[1], stride=2)
         self.encoder3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.encoder4 = self._make_layer(block, 512, layers[3], stride=2)
 
 
 
@@ -193,7 +194,7 @@ class ResNet(nn.Module):
     def _make_decoder_layer(self, block: Type[Union[Bottleneck]], in_channels: int,
                             out_channels: int, blocks: int = 1) -> nn.Sequential:
         """
-        构建解码器部分的残差块
+        构建解码器部分的残差块
         """
         assert in_channels == out_channels, "in_channels must equal out_channels"
         layers = []
@@ -212,7 +213,7 @@ class ResNet(nn.Module):
 
     def _make_upsample_layer(self, in_channels: int, out_channels: int) -> nn.Module:
         """
-        使用转置卷积进行上采样
+        使用转置卷积进行上采样
         """
         return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
 
@@ -228,14 +229,14 @@ class ResNet(nn.Module):
         print(f'x2:{x2.shape}')
         x3= self.encoder3(x2)
         print(f'x3:{x3.shape}')
-        # x4= self.encoder4(x3)
-        # print(f'x4:{x4.shape}')
+        x4= self.encoder4(x3)
+        print(f'x4:{x4.shape}')
         out={
             'encoder0':x0,
             'encoder1': x1,
             'encoder2': x2,
             'encoder3': x3,
-            # 'encoder4': x4,
+            'encoder4': x4,
         }
         return out
 
@@ -245,20 +246,21 @@ class ResNet(nn.Module):
 
 
 # ----------------------------
-# 测试代码
+# 测试代码
 # ----------------------------
 
 if __name__ == "__main__":
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     # model = ResNet(Bottleneck, n_classes=5).to(device)
     # print(model)
-    model=resnet50fpn().to(device)
+    # model=resnet50fpn().to(device)
+    model = resnet18fpn().to(device)
 
 
     input_tensor = torch.randn(1, 3, 512, 512).to(device)
     output_tensor = model(input_tensor)
 
-    backbone = ResNet(Bottleneck,[3,4,6]).to(device)
+    backbone = ResNet(Bottleneck,[3,4,6,3]).to(device)
     features = backbone(input_tensor)
     print("Raw backbone output:", list(features.keys()))
     print(f"Input shape: {input_tensor.shape}")
@@ -267,5 +269,5 @@ if __name__ == "__main__":
     print(f"Output shape1: {output_tensor['1'].shape}")
     print(f"Output shape2: {output_tensor['2'].shape}")
     print(f"Output shape3: {output_tensor['3'].shape}")
-    # print(f"Output shape4: {output_tensor['5'].shape}")
+    print(f"Output shape4: {output_tensor['4'].shape}")
     print(f"Output shape5: {output_tensor['pool'].shape}")

+ 4 - 4
models/line_detect/line_dataset.py

@@ -168,10 +168,10 @@ def get_boxes_lines(objs,shape):
 
         elif label =='point':
              p= obj['points'][0]
-             xmin=max(0,p[0]-6)
-             xmax = min(w, p[0] +6)
-             ymin=max(0,p[1]-6)
-             ymax = min(h, p[1] + 6)
+             xmin=max(0,p[0]-12)
+             xmax = min(w, p[0] +12)
+             ymin=max(0,p[1]-12)
+             ymax = min(h, p[1] + 12)
 
              points.append(p)
              labels.append(torch.tensor(1))

+ 39 - 2
models/line_detect/line_detect.py

@@ -75,7 +75,7 @@ class LineDetect(BaseDetectionNet):
             box_predictor=None,
             box_score_thresh=0.05,
             box_nms_thresh=0.5,
-            box_detections_per_img=100,
+            box_detections_per_img=200,
             box_fg_iou_thresh=0.5,
             box_bg_iou_thresh=0.5,
             box_batch_size_per_image=512,
@@ -305,7 +305,44 @@ def linedetect_newresnet18fpn(
 
 
     backbone =resnet18fpn()
-    featmap_names=['0', '1', '2', '3','pool']
+    featmap_names=['0', '1', '2', '3','4','pool']
+    # print(f'featmap_names:{featmap_names}')
+    roi_pooler = MultiScaleRoIAlign(
+        featmap_names=featmap_names,
+        output_size=7,
+        sampling_ratio=2
+    )
+    num_features=len(featmap_names)
+    anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features))  # 自动生成不同大小
+    # print(f'anchor_sizes:{anchor_sizes}')
+    aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
+    # print(f'aspect_ratios:{aspect_ratios}')
+
+
+    anchor_generator =  AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
+
+    model = LineDetect(backbone, num_classes, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, **kwargs)
+
+    return model
+
+def linedetect_newresnet50fpn(
+        *,
+
+        num_classes: Optional[int] = None,
+        num_points:Optional[int] = None,
+
+        **kwargs: Any,
+) -> LineDetect:
+    # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
+    # weights_backbone = ResNet50_Weights.verify(weights_backbone)
+    if num_classes is None:
+        num_classes = 3
+    if num_points is None:
+        num_points = 3
+
+
+    backbone =resnet50fpn()
+    featmap_names=['0', '1', '2', '3','4','pool']
     # print(f'featmap_names:{featmap_names}')
     roi_pooler = MultiScaleRoIAlign(
         featmap_names=featmap_names,

+ 5 - 3
models/line_detect/train_demo.py

@@ -1,7 +1,7 @@
 import torch
 
-from models.line_detect.line_detect import linedetect_newresnet18fpn, linedetect_resnet50_fpn, linedetect_resnet18_fpn
-
+from models.line_detect.line_detect import linedetect_newresnet18fpn, linedetect_resnet50_fpn, linedetect_resnet18_fpn, \
+    linedetect_newresnet50fpn
 
 from models.line_net.trainer import Trainer
 
@@ -16,6 +16,8 @@ if __name__ == '__main__':
     # model = lineDetect_resnet18_fpn()
 
     # model=linedetect_resnet18_fpn()
-    model=linedetect_newresnet18fpn(num_points=3)
+    # model=linedetect_newresnet18fpn(num_points=3)
+    model = linedetect_newresnet50fpn(num_points=3)
+    # model.load_weights(save_path=r'/home/admin/projects/MultiVisionModels/models/line_detect/train_results/20250711_114046/weights/best_val.pth')
 
     model.start_train(cfg='train.yaml')