5 months ago · c708caed2e
--- a/models/base/high_reso_resnet.py
+++ b/models/base/high_reso_resnet.py
@@ -8,7 +8,7 @@ from typing import Any, Callable, List, Optional, Type, Union
 
															 from torchvision.models.detection.backbone_utils import BackboneWithFPN
														
 
															 # ----------------------------
														
 
															-# 工具函数
														
 
															+# å·¥å·å½æ°
														
 
															 # ----------------------------
														
 
															 def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
														
@@ -30,7 +30,7 @@ def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
 
															     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
														
 
															 # ----------------------------
														
 
															-# Bottleneck Block（你提供的）
														
 
															+# Bottleneck Blockï¼ä½ æä¾çï¼
														
 
															 # ----------------------------
														
 
															 class Bottleneck(nn.Module):
														
@@ -85,22 +85,22 @@ class Bottleneck(nn.Module):
 
															         return out
														
 
															 # ----------------------------
														
 
															-# ResNet 主类
														
 
															+# ResNet ä¸»ç±»
														
 
															 # ----------------------------
														
 
															 def resnet18fpn(out_channels=256):
														
 
															-    backbone = ResNet(Bottleneck,[2,2,2])
														
 
															+    backbone = ResNet(Bottleneck,[2,2,2,2])
														
 
															     return_layers = {
														
 
															         'encoder0': '0',
														
 
															         'encoder1': '1',
														
 
															         'encoder2': '2',
														
 
															         'encoder3': '3',
														
 
															-        # 'encoder4': '5'
														
 
															+        'encoder4': '4'
														
 
															     }
														
 
															     # in_channels_list = [self.inplanes, 64, 128, 256, 512]
														
 
															-    # in_channels_list = [64, 256, 512, 1024, 2048]
														
 
															-    in_channels_list = [64, 256, 512, 1024]
														
 
															+    in_channels_list = [64, 256, 512, 1024, 2048]
														
 
															+    # in_channels_list = [64, 256, 512, 1024]
														
 
															     return BackboneWithFPN(
														
 
															         backbone,
														
@@ -110,18 +110,18 @@ def resnet18fpn(out_channels=256):
 
															     )
														
 
															 def resnet50fpn(out_channels=256):
														
 
															-    backbone = ResNet(Bottleneck,[3,4,6])
														
 
															+    backbone = ResNet(Bottleneck,[3,4,6,3])
														
 
															     return_layers = {
														
 
															         'encoder0': '0',
														
 
															         'encoder1': '1',
														
 
															         'encoder2': '2',
														
 
															         'encoder3': '3',
														
 
															-        # 'encoder4': '5'
														
 
															+        'encoder4': '4'
														
 
															     }
														
 
															     # in_channels_list = [self.inplanes, 64, 128, 256, 512]
														
 
															-    # in_channels_list = [64, 256, 512, 1024, 2048]
														
 
															-    in_channels_list = [64, 256, 512, 1024]
														
 
															+    in_channels_list = [64, 256, 512, 1024, 2048]
														
 
															+    # in_channels_list = [64, 256, 512, 1024]
														
 
															     return BackboneWithFPN(
														
 
															         backbone,
														
@@ -150,6 +150,7 @@ class ResNet(nn.Module):
 
															         self.encoder1 = self._make_layer(block, 64, layers[0],stride=2)
														
 
															         self.encoder2 = self._make_layer(block, 128, layers[1], stride=2)
														
 
															         self.encoder3 = self._make_layer(block, 256, layers[2], stride=2)
														
 
															+        self.encoder4 = self._make_layer(block, 512, layers[3], stride=2)
														
@@ -193,7 +194,7 @@ class ResNet(nn.Module):
 
															     def _make_decoder_layer(self, block: Type[Union[Bottleneck]], in_channels: int,
														
 
															                             out_channels: int, blocks: int = 1) -> nn.Sequential:
														
 
															         """
														
 
															-        构建解码器部分的残差块
														
 
															+        æå»ºè§£ç å¨é¨åçæ®å·®å
														
 
															         """
														
 
															         assert in_channels == out_channels, "in_channels must equal out_channels"
														
 
															         layers = []
														
@@ -212,7 +213,7 @@ class ResNet(nn.Module):
 
															     def _make_upsample_layer(self, in_channels: int, out_channels: int) -> nn.Module:
														
 
															         """
														
 
															-        使用转置卷积进行上采样
														
 
															+        ä½¿ç¨è½¬ç½®å·ç§¯è¿è¡ä¸éæ ·
														
 
															         """
														
 
															         return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
														
@@ -228,14 +229,14 @@ class ResNet(nn.Module):
 
															         print(f'x2:{x2.shape}')
														
 
															         x3= self.encoder3(x2)
														
 
															         print(f'x3:{x3.shape}')
														
 
															-        # x4= self.encoder4(x3)
														
 
															-        # print(f'x4:{x4.shape}')
														
 
															+        x4= self.encoder4(x3)
														
 
															+        print(f'x4:{x4.shape}')
														
 
															         out={
														
 
															             'encoder0':x0,
														
 
															             'encoder1': x1,
														
 
															             'encoder2': x2,
														
 
															             'encoder3': x3,
														
 
															-            # 'encoder4': x4,
														
 
															+            'encoder4': x4,
														
 
															         }
														
 
															         return out
														
@@ -245,20 +246,21 @@ class ResNet(nn.Module):
 
															 # ----------------------------
														
 
															-# 测试代码
														
 
															+# æµè¯ä»£ç 
														
 
															 # ----------------------------
														
 
															 if __name__ == "__main__":
														
 
															     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
														
 
															     # model = ResNet(Bottleneck, n_classes=5).to(device)
														
 
															     # print(model)
														
 
															-    model=resnet50fpn().to(device)
														
 
															+    # model=resnet50fpn().to(device)
														
 
															+    model = resnet18fpn().to(device)
														
 
															     input_tensor = torch.randn(1, 3, 512, 512).to(device)
														
 
															     output_tensor = model(input_tensor)
														
 
															-    backbone = ResNet(Bottleneck,[3,4,6]).to(device)
														
 
															+    backbone = ResNet(Bottleneck,[3,4,6,3]).to(device)
														
 
															     features = backbone(input_tensor)
														
 
															     print("Raw backbone output:", list(features.keys()))
														
 
															     print(f"Input shape: {input_tensor.shape}")
														
@@ -267,5 +269,5 @@ if __name__ == "__main__":
 
															     print(f"Output shape1: {output_tensor['1'].shape}")
														
 
															     print(f"Output shape2: {output_tensor['2'].shape}")
														
 
															     print(f"Output shape3: {output_tensor['3'].shape}")
														
 
															-    # print(f"Output shape4: {output_tensor['5'].shape}")
														
 
															+    print(f"Output shape4: {output_tensor['4'].shape}")
														
 
															     print(f"Output shape5: {output_tensor['pool'].shape}")
														
--- a/models/line_detect/line_dataset.py
+++ b/models/line_detect/line_dataset.py
@@ -168,10 +168,10 @@ def get_boxes_lines(objs,shape):
 
															         elif label =='point':
														
 
															              p= obj['points'][0]
														
 
															-             xmin=max(0,p[0]-6)
														
 
															-             xmax = min(w, p[0] +6)
														
 
															-             ymin=max(0,p[1]-6)
														
 
															-             ymax = min(h, p[1] + 6)
														
 
															+             xmin=max(0,p[0]-12)
														
 
															+             xmax = min(w, p[0] +12)
														
 
															+             ymin=max(0,p[1]-12)
														
 
															+             ymax = min(h, p[1] + 12)
														
 
															              points.append(p)
														
 
															              labels.append(torch.tensor(1))
														
--- a/models/line_detect/line_detect.py
+++ b/models/line_detect/line_detect.py
@@ -75,7 +75,7 @@ class LineDetect(BaseDetectionNet):
 
															             box_predictor=None,
														
 
															             box_score_thresh=0.05,
														
 
															             box_nms_thresh=0.5,
														
 
															-            box_detections_per_img=100,
														
 
															+            box_detections_per_img=200,
														
 
															             box_fg_iou_thresh=0.5,
														
 
															             box_bg_iou_thresh=0.5,
														
 
															             box_batch_size_per_image=512,
														
@@ -305,7 +305,44 @@ def linedetect_newresnet18fpn(
 
															     backbone =resnet18fpn()
														
 
															-    featmap_names=['0', '1', '2', '3','pool']
														
 
															+    featmap_names=['0', '1', '2', '3','4','pool']
														
 
															+    # print(f'featmap_names:{featmap_names}')
														
 
															+    roi_pooler = MultiScaleRoIAlign(
														
 
															+        featmap_names=featmap_names,
														
 
															+        output_size=7,
														
 
															+        sampling_ratio=2
														
 
															+    )
														
 
															+    num_features=len(featmap_names)
														
 
															+    anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features))  # 自动生成不同大小
														
 
															+    # print(f'anchor_sizes:{anchor_sizes}')
														
 
															+    aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
														
 
															+    # print(f'aspect_ratios:{aspect_ratios}')
														
 
															+
														
 
															+
														
 
															+    anchor_generator =  AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
														
 
															+
														
 
															+    model = LineDetect(backbone, num_classes, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, **kwargs)
														
 
															+
														
 
															+    return model
														
 
															+
														
 
															+def linedetect_newresnet50fpn(
														
 
															+        *,
														
 
															+
														
 
															+        num_classes: Optional[int] = None,
														
 
															+        num_points:Optional[int] = None,
														
 
															+
														
 
															+        **kwargs: Any,
														
 
															+) -> LineDetect:
														
 
															+    # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
														
 
															+    # weights_backbone = ResNet50_Weights.verify(weights_backbone)
														
 
															+    if num_classes is None:
														
 
															+        num_classes = 3
														
 
															+    if num_points is None:
														
 
															+        num_points = 3
														
 
															+
														
 
															+
														
 
															+    backbone =resnet50fpn()
														
 
															+    featmap_names=['0', '1', '2', '3','4','pool']
														
 
															     # print(f'featmap_names:{featmap_names}')
														
 
															     roi_pooler = MultiScaleRoIAlign(
														
 
															         featmap_names=featmap_names,
														
--- a/models/line_detect/train_demo.py
+++ b/models/line_detect/train_demo.py
@@ -1,7 +1,7 @@
 
															 import torch
														
 
															-from models.line_detect.line_detect import linedetect_newresnet18fpn, linedetect_resnet50_fpn, linedetect_resnet18_fpn
														
 
															-
														
 
															+from models.line_detect.line_detect import linedetect_newresnet18fpn, linedetect_resnet50_fpn, linedetect_resnet18_fpn, \
														
 
															+    linedetect_newresnet50fpn
														
 
															 from models.line_net.trainer import Trainer
														
@@ -16,6 +16,8 @@ if __name__ == '__main__':
 
															     # model = lineDetect_resnet18_fpn()
														
 
															     # model=linedetect_resnet18_fpn()
														
 
															-    model=linedetect_newresnet18fpn(num_points=3)
														
 
															+    # model=linedetect_newresnet18fpn(num_points=3)
														
 
															+    model = linedetect_newresnet50fpn(num_points=3)
														
 
															+    # model.load_weights(save_path=r'/home/admin/projects/MultiVisionModels/models/line_detect/train_results/20250711_114046/weights/best_val.pth')
														
 
															     model.start_train(cfg='train.yaml')