5 meses atrás · c708caed2e
--- a/models/base/high_reso_resnet.py
+++ b/models/base/high_reso_resnet.py
@@ -8,7 +8,7 @@ from typing import Any, Callable, List, Optional, Type, Union
 
				 from torchvision.models.detection.backbone_utils import BackboneWithFPN
			
 
				 
			
 
				 # ----------------------------
			
 
				-# 工具函数
			
 
				+# å·¥å·å½æ°
			
 
				 # ----------------------------
			
 
				 
			
 
				 def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
			
@@ -30,7 +30,7 @@ def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
 
				     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
			
 
				 
			
 
				 # ----------------------------
			
 
				-# Bottleneck Block（你提供的）
			
 
				+# Bottleneck Blockï¼ä½ æä¾çï¼
			
 
				 # ----------------------------
			
 
				 
			
 
				 class Bottleneck(nn.Module):
			
@@ -85,22 +85,22 @@ class Bottleneck(nn.Module):
 
				         return out
			
 
				 
			
 
				 # ----------------------------
			
 
				-# ResNet 主类
			
 
				+# ResNet ä¸»ç±»
			
 
				 # ----------------------------
			
 
				 
			
 
				 def resnet18fpn(out_channels=256):
			
 
				-    backbone = ResNet(Bottleneck,[2,2,2])
			
 
				+    backbone = ResNet(Bottleneck,[2,2,2,2])
			
 
				     return_layers = {
			
 
				         'encoder0': '0',
			
 
				         'encoder1': '1',
			
 
				         'encoder2': '2',
			
 
				         'encoder3': '3',
			
 
				-        # 'encoder4': '5'
			
 
				+        'encoder4': '4'
			
 
				     }
			
 
				 
			
 
				     # in_channels_list = [self.inplanes, 64, 128, 256, 512]
			
 
				-    # in_channels_list = [64, 256, 512, 1024, 2048]
			
 
				-    in_channels_list = [64, 256, 512, 1024]
			
 
				+    in_channels_list = [64, 256, 512, 1024, 2048]
			
 
				+    # in_channels_list = [64, 256, 512, 1024]
			
 
				 
			
 
				     return BackboneWithFPN(
			
 
				         backbone,
			
@@ -110,18 +110,18 @@ def resnet18fpn(out_channels=256):
 
				     )
			
 
				 
			
 
				 def resnet50fpn(out_channels=256):
			
 
				-    backbone = ResNet(Bottleneck,[3,4,6])
			
 
				+    backbone = ResNet(Bottleneck,[3,4,6,3])
			
 
				     return_layers = {
			
 
				         'encoder0': '0',
			
 
				         'encoder1': '1',
			
 
				         'encoder2': '2',
			
 
				         'encoder3': '3',
			
 
				-        # 'encoder4': '5'
			
 
				+        'encoder4': '4'
			
 
				     }
			
 
				 
			
 
				     # in_channels_list = [self.inplanes, 64, 128, 256, 512]
			
 
				-    # in_channels_list = [64, 256, 512, 1024, 2048]
			
 
				-    in_channels_list = [64, 256, 512, 1024]
			
 
				+    in_channels_list = [64, 256, 512, 1024, 2048]
			
 
				+    # in_channels_list = [64, 256, 512, 1024]
			
 
				 
			
 
				     return BackboneWithFPN(
			
 
				         backbone,
			
@@ -150,6 +150,7 @@ class ResNet(nn.Module):
 
				         self.encoder1 = self._make_layer(block, 64, layers[0],stride=2)
			
 
				         self.encoder2 = self._make_layer(block, 128, layers[1], stride=2)
			
 
				         self.encoder3 = self._make_layer(block, 256, layers[2], stride=2)
			
 
				+        self.encoder4 = self._make_layer(block, 512, layers[3], stride=2)
			
 
				 
			
 
				 
			
 
				 
			
@@ -193,7 +194,7 @@ class ResNet(nn.Module):
 
				     def _make_decoder_layer(self, block: Type[Union[Bottleneck]], in_channels: int,
			
 
				                             out_channels: int, blocks: int = 1) -> nn.Sequential:
			
 
				         """
			
 
				-        构建解码器部分的残差块
			
 
				+        æå»ºè§£ç å¨é¨åçæ®å·®å
			
 
				         """
			
 
				         assert in_channels == out_channels, "in_channels must equal out_channels"
			
 
				         layers = []
			
@@ -212,7 +213,7 @@ class ResNet(nn.Module):
 
				 
			
 
				     def _make_upsample_layer(self, in_channels: int, out_channels: int) -> nn.Module:
			
 
				         """
			
 
				-        使用转置卷积进行上采样
			
 
				+        ä½¿ç¨è½¬ç½®å·ç§¯è¿è¡ä¸éæ ·
			
 
				         """
			
 
				         return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
			
 
				 
			
@@ -228,14 +229,14 @@ class ResNet(nn.Module):
 
				         print(f'x2:{x2.shape}')
			
 
				         x3= self.encoder3(x2)
			
 
				         print(f'x3:{x3.shape}')
			
 
				-        # x4= self.encoder4(x3)
			
 
				-        # print(f'x4:{x4.shape}')
			
 
				+        x4= self.encoder4(x3)
			
 
				+        print(f'x4:{x4.shape}')
			
 
				         out={
			
 
				             'encoder0':x0,
			
 
				             'encoder1': x1,
			
 
				             'encoder2': x2,
			
 
				             'encoder3': x3,
			
 
				-            # 'encoder4': x4,
			
 
				+            'encoder4': x4,
			
 
				         }
			
 
				         return out
			
 
				 
			
@@ -245,20 +246,21 @@ class ResNet(nn.Module):
 
				 
			
 
				 
			
 
				 # ----------------------------
			
 
				-# 测试代码
			
 
				+# æµè¯ä»£ç 
			
 
				 # ----------------------------
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
			
 
				     # model = ResNet(Bottleneck, n_classes=5).to(device)
			
 
				     # print(model)
			
 
				-    model=resnet50fpn().to(device)
			
 
				+    # model=resnet50fpn().to(device)
			
 
				+    model = resnet18fpn().to(device)
			
 
				 
			
 
				 
			
 
				     input_tensor = torch.randn(1, 3, 512, 512).to(device)
			
 
				     output_tensor = model(input_tensor)
			
 
				 
			
 
				-    backbone = ResNet(Bottleneck,[3,4,6]).to(device)
			
 
				+    backbone = ResNet(Bottleneck,[3,4,6,3]).to(device)
			
 
				     features = backbone(input_tensor)
			
 
				     print("Raw backbone output:", list(features.keys()))
			
 
				     print(f"Input shape: {input_tensor.shape}")
			
@@ -267,5 +269,5 @@ if __name__ == "__main__":
 
				     print(f"Output shape1: {output_tensor['1'].shape}")
			
 
				     print(f"Output shape2: {output_tensor['2'].shape}")
			
 
				     print(f"Output shape3: {output_tensor['3'].shape}")
			
 
				-    # print(f"Output shape4: {output_tensor['5'].shape}")
			
 
				+    print(f"Output shape4: {output_tensor['4'].shape}")
			
 
				     print(f"Output shape5: {output_tensor['pool'].shape}")
			
--- a/models/line_detect/line_dataset.py
+++ b/models/line_detect/line_dataset.py
@@ -168,10 +168,10 @@ def get_boxes_lines(objs,shape):
 
				 
			
 
				         elif label =='point':
			
 
				              p= obj['points'][0]
			
 
				-             xmin=max(0,p[0]-6)
			
 
				-             xmax = min(w, p[0] +6)
			
 
				-             ymin=max(0,p[1]-6)
			
 
				-             ymax = min(h, p[1] + 6)
			
 
				+             xmin=max(0,p[0]-12)
			
 
				+             xmax = min(w, p[0] +12)
			
 
				+             ymin=max(0,p[1]-12)
			
 
				+             ymax = min(h, p[1] + 12)
			
 
				 
			
 
				              points.append(p)
			
 
				              labels.append(torch.tensor(1))
			
--- a/models/line_detect/line_detect.py
+++ b/models/line_detect/line_detect.py
@@ -75,7 +75,7 @@ class LineDetect(BaseDetectionNet):
 
				             box_predictor=None,
			
 
				             box_score_thresh=0.05,
			
 
				             box_nms_thresh=0.5,
			
 
				-            box_detections_per_img=100,
			
 
				+            box_detections_per_img=200,
			
 
				             box_fg_iou_thresh=0.5,
			
 
				             box_bg_iou_thresh=0.5,
			
 
				             box_batch_size_per_image=512,
			
@@ -305,7 +305,44 @@ def linedetect_newresnet18fpn(
 
				 
			
 
				 
			
 
				     backbone =resnet18fpn()
			
 
				-    featmap_names=['0', '1', '2', '3','pool']
			
 
				+    featmap_names=['0', '1', '2', '3','4','pool']
			
 
				+    # print(f'featmap_names:{featmap_names}')
			
 
				+    roi_pooler = MultiScaleRoIAlign(
			
 
				+        featmap_names=featmap_names,
			
 
				+        output_size=7,
			
 
				+        sampling_ratio=2
			
 
				+    )
			
 
				+    num_features=len(featmap_names)
			
 
				+    anchor_sizes = tuple((int(16 * 2 ** i),) for i in range(num_features))  # 自动生成不同大小
			
 
				+    # print(f'anchor_sizes:{anchor_sizes}')
			
 
				+    aspect_ratios = ((0.5, 1.0, 2.0),) * num_features
			
 
				+    # print(f'aspect_ratios:{aspect_ratios}')
			
 
				+
			
 
				+
			
 
				+    anchor_generator =  AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
			
 
				+
			
 
				+    model = LineDetect(backbone, num_classes, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, **kwargs)
			
 
				+
			
 
				+    return model
			
 
				+
			
 
				+def linedetect_newresnet50fpn(
			
 
				+        *,
			
 
				+
			
 
				+        num_classes: Optional[int] = None,
			
 
				+        num_points:Optional[int] = None,
			
 
				+
			
 
				+        **kwargs: Any,
			
 
				+) -> LineDetect:
			
 
				+    # weights = LineNet_ResNet50_FPN_Weights.verify(weights)
			
 
				+    # weights_backbone = ResNet50_Weights.verify(weights_backbone)
			
 
				+    if num_classes is None:
			
 
				+        num_classes = 3
			
 
				+    if num_points is None:
			
 
				+        num_points = 3
			
 
				+
			
 
				+
			
 
				+    backbone =resnet50fpn()
			
 
				+    featmap_names=['0', '1', '2', '3','4','pool']
			
 
				     # print(f'featmap_names:{featmap_names}')
			
 
				     roi_pooler = MultiScaleRoIAlign(
			
 
				         featmap_names=featmap_names,
			
--- a/models/line_detect/train_demo.py
+++ b/models/line_detect/train_demo.py
@@ -1,7 +1,7 @@
 
				 import torch
			
 
				 
			
 
				-from models.line_detect.line_detect import linedetect_newresnet18fpn, linedetect_resnet50_fpn, linedetect_resnet18_fpn
			
 
				-
			
 
				+from models.line_detect.line_detect import linedetect_newresnet18fpn, linedetect_resnet50_fpn, linedetect_resnet18_fpn, \
			
 
				+    linedetect_newresnet50fpn
			
 
				 
			
 
				 from models.line_net.trainer import Trainer
			
 
				 
			
@@ -16,6 +16,8 @@ if __name__ == '__main__':
 
				     # model = lineDetect_resnet18_fpn()
			
 
				 
			
 
				     # model=linedetect_resnet18_fpn()
			
 
				-    model=linedetect_newresnet18fpn(num_points=3)
			
 
				+    # model=linedetect_newresnet18fpn(num_points=3)
			
 
				+    model = linedetect_newresnet50fpn(num_points=3)
			
 
				+    # model.load_weights(save_path=r'/home/admin/projects/MultiVisionModels/models/line_detect/train_results/20250711_114046/weights/best_val.pth')
			
 
				 
			
 
				     model.start_train(cfg='train.yaml')