6 meses atrás · 0c68613b41
--- a/models/line_detect/.ipynb_checkpoints/line_net-checkpoint.py
+++ b/models/line_detect/.ipynb_checkpoints/line_net-checkpoint.py
@@ -1,774 +0,0 @@
 
				-from typing import Any, Callable, List, Optional, Tuple, Union
			
 
				-import torch
			
 
				-from torch import nn
			
 
				-from torchvision.ops import MultiScaleRoIAlign
			
 
				-
			
 
				-from libs.vision_libs.models import MobileNet_V3_Large_Weights, mobilenet_v3_large
			
 
				-from libs.vision_libs.models.detection.anchor_utils import AnchorGenerator
			
 
				-from libs.vision_libs.models.detection.rpn import RPNHead, RegionProposalNetwork
			
 
				-from libs.vision_libs.models.detection.ssdlite import _mobilenet_extractor
			
 
				-from libs.vision_libs.models.detection.transform import GeneralizedRCNNTransform
			
 
				-from libs.vision_libs.ops import misc as misc_nn_ops
			
 
				-from libs.vision_libs.transforms._presets import ObjectDetection
			
 
				-from .line_head import LineRCNNHeads
			
 
				-from .line_predictor import LineRCNNPredictor
			
 
				-from libs.vision_libs.models._api import register_model, Weights, WeightsEnum
			
 
				-from libs.vision_libs.models._meta import _COCO_PERSON_CATEGORIES, _COCO_PERSON_KEYPOINT_NAMES, _COCO_CATEGORIES
			
 
				-from libs.vision_libs.models._utils import _ovewrite_value_param, handle_legacy_interface
			
 
				-from libs.vision_libs.models.resnet import resnet50, ResNet50_Weights
			
 
				-from libs.vision_libs.models.detection._utils import overwrite_eps
			
 
				-from libs.vision_libs.models.detection.backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
			
 
				-from libs.vision_libs.models.detection.faster_rcnn import FasterRCNN, TwoMLPHead, FastRCNNPredictor
			
 
				-
			
 
				-from .roi_heads import RoIHeads
			
 
				-from .trainer import Trainer
			
 
				-from ..base import backbone_factory
			
 
				-from ..base.base_detection_net import BaseDetectionNet
			
 
				-import torch.nn.functional as F
			
 
				-
			
 
				-from ..config.config_tool import read_yaml
			
 
				-
			
 
				-FEATURE_DIM = 8
			
 
				-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
			
 
				-
			
 
				-__all__ = [
			
 
				-    "LineNet",
			
 
				-    "LineNet_ResNet50_FPN_Weights",
			
 
				-    "LineNet_ResNet50_FPN_V2_Weights",
			
 
				-    "LineNet_MobileNet_V3_Large_FPN_Weights",
			
 
				-    "LineNet_MobileNet_V3_Large_320_FPN_Weights",
			
 
				-    "linenet_resnet50_fpn",
			
 
				-    "linenet_resnet50_fpn_v2",
			
 
				-    "linenet_mobilenet_v3_large_fpn",
			
 
				-    "linenet_mobilenet_v3_large_320_fpn",
			
 
				-]
			
 
				-
			
 
				-
			
 
				-def _default_anchorgen():
			
 
				-    anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
			
 
				-    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
			
 
				-    return AnchorGenerator(anchor_sizes, aspect_ratios)
			
 
				-
			
 
				-
			
 
				-class LineNet(BaseDetectionNet):
			
 
				-    def __init__(self, cfg, **kwargs):
			
 
				-        cfg = read_yaml(cfg)
			
 
				-        self.cfg=cfg
			
 
				-        backbone = cfg['backbone']
			
 
				-        print(f'LineNet Backbone:{backbone}')
			
 
				-        num_classes = cfg['num_classes']
			
 
				-
			
 
				-        if backbone == 'resnet50_fpn':
			
 
				-            backbone=backbone_factory.get_resnet50_fpn()
			
 
				-            print(f'out_chanenels:{backbone.out_channels}')
			
 
				-        elif backbone== 'mobilenet_v3_large_fpn':
			
 
				-            backbone=backbone_factory.get_mobilenet_v3_large_fpn()
			
 
				-        elif backbone=='resnet18_fpn':
			
 
				-            backbone=backbone_factory.get_resnet18_fpn()
			
 
				-
			
 
				-        self.__construct__(backbone=backbone, num_classes=num_classes, **kwargs)
			
 
				-
			
 
				-
			
 
				-    def __construct__(
			
 
				-            self,
			
 
				-            backbone,
			
 
				-            num_classes=None,
			
 
				-            # transform parameters
			
 
				-            min_size=512,
			
 
				-            max_size=1333,
			
 
				-            image_mean=None,
			
 
				-            image_std=None,
			
 
				-            # RPN parameters
			
 
				-            rpn_anchor_generator=None,
			
 
				-            rpn_head=None,
			
 
				-            rpn_pre_nms_top_n_train=2000,
			
 
				-            rpn_pre_nms_top_n_test=1000,
			
 
				-            rpn_post_nms_top_n_train=2000,
			
 
				-            rpn_post_nms_top_n_test=1000,
			
 
				-            rpn_nms_thresh=0.7,
			
 
				-            rpn_fg_iou_thresh=0.7,
			
 
				-            rpn_bg_iou_thresh=0.3,
			
 
				-            rpn_batch_size_per_image=256,
			
 
				-            rpn_positive_fraction=0.5,
			
 
				-            rpn_score_thresh=0.0,
			
 
				-            # Box parameters
			
 
				-            box_roi_pool=None,
			
 
				-            box_head=None,
			
 
				-            box_predictor=None,
			
 
				-            box_score_thresh=0.05,
			
 
				-            box_nms_thresh=0.5,
			
 
				-            box_detections_per_img=100,
			
 
				-            box_fg_iou_thresh=0.5,
			
 
				-            box_bg_iou_thresh=0.5,
			
 
				-            box_batch_size_per_image=512,
			
 
				-            box_positive_fraction=0.25,
			
 
				-            bbox_reg_weights=None,
			
 
				-            # line parameters
			
 
				-            line_head=None,
			
 
				-            line_predictor=None,
			
 
				-            **kwargs,
			
 
				-    ):
			
 
				-
			
 
				-        if not hasattr(backbone, "out_channels"):
			
 
				-            raise ValueError(
			
 
				-                "backbone should contain an attribute out_channels "
			
 
				-                "specifying the number of output channels (assumed to be the "
			
 
				-                "same for all the levels)"
			
 
				-            )
			
 
				-
			
 
				-        if not isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))):
			
 
				-            raise TypeError(
			
 
				-                f"rpn_anchor_generator should be of type AnchorGenerator or None instead of {type(rpn_anchor_generator)}"
			
 
				-            )
			
 
				-        if not isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))):
			
 
				-            raise TypeError(
			
 
				-                f"box_roi_pool should be of type MultiScaleRoIAlign or None instead of {type(box_roi_pool)}"
			
 
				-            )
			
 
				-
			
 
				-        if num_classes is not None:
			
 
				-            if box_predictor is not None:
			
 
				-                raise ValueError("num_classes should be None when box_predictor is specified")
			
 
				-        else:
			
 
				-            if box_predictor is None:
			
 
				-                raise ValueError("num_classes should not be None when box_predictor is not specified")
			
 
				-
			
 
				-        out_channels = backbone.out_channels
			
 
				-
			
 
				-        if line_head is None:
			
 
				-            num_class = 5
			
 
				-            line_head = LineRCNNHeads(out_channels, num_class)
			
 
				-
			
 
				-        if line_predictor is None:
			
 
				-            line_predictor = LineRCNNPredictor(self.cfg)
			
 
				-
			
 
				-        if rpn_anchor_generator is None:
			
 
				-            rpn_anchor_generator = _default_anchorgen()
			
 
				-        if rpn_head is None:
			
 
				-            rpn_head = RPNHead(out_channels, rpn_anchor_generator.num_anchors_per_location()[0])
			
 
				-
			
 
				-        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
			
 
				-        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)
			
 
				-
			
 
				-        rpn = RegionProposalNetwork(
			
 
				-            rpn_anchor_generator,
			
 
				-            rpn_head,
			
 
				-            rpn_fg_iou_thresh,
			
 
				-            rpn_bg_iou_thresh,
			
 
				-            rpn_batch_size_per_image,
			
 
				-            rpn_positive_fraction,
			
 
				-            rpn_pre_nms_top_n,
			
 
				-            rpn_post_nms_top_n,
			
 
				-            rpn_nms_thresh,
			
 
				-            score_thresh=rpn_score_thresh,
			
 
				-        )
			
 
				-
			
 
				-        if box_roi_pool is None:
			
 
				-            box_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=7, sampling_ratio=2)
			
 
				-
			
 
				-        if box_head is None:
			
 
				-            resolution = box_roi_pool.output_size[0]
			
 
				-            representation_size = 1024
			
 
				-            box_head = TwoMLPHead(out_channels * resolution ** 2, representation_size)
			
 
				-
			
 
				-        if box_predictor is None:
			
 
				-            representation_size = 1024
			
 
				-            box_predictor = BoxPredictor(representation_size, num_classes)
			
 
				-
			
 
				-        roi_heads = RoIHeads(
			
 
				-            # Box
			
 
				-            box_roi_pool,
			
 
				-            box_head,
			
 
				-            box_predictor,
			
 
				-            line_head,
			
 
				-            line_predictor,
			
 
				-            box_fg_iou_thresh,
			
 
				-            box_bg_iou_thresh,
			
 
				-            box_batch_size_per_image,
			
 
				-            box_positive_fraction,
			
 
				-            bbox_reg_weights,
			
 
				-            box_score_thresh,
			
 
				-            box_nms_thresh,
			
 
				-            box_detections_per_img,
			
 
				-        )
			
 
				-
			
 
				-        if image_mean is None:
			
 
				-            image_mean = [0.485, 0.456, 0.406]
			
 
				-        if image_std is None:
			
 
				-            image_std = [0.229, 0.224, 0.225]
			
 
				-        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std, **kwargs)
			
 
				-
			
 
				-        super().__init__(backbone, rpn, roi_heads, transform)
			
 
				-
			
 
				-        self.roi_heads = roi_heads
			
 
				-
			
 
				-        self.roi_heads.line_head = line_head
			
 
				-        self.roi_heads.line_predictor = line_predictor
			
 
				-
			
 
				-    def train_by_cfg(self, cfg):
			
 
				-        # cfg = read_yaml(cfg)
			
 
				-        self.trainer = Trainer()
			
 
				-        self.trainer.train_from_cfg(model=self, cfg=cfg)
			
 
				-
			
 
				-
			
 
				-
			
 
				-class TwoMLPHead(nn.Module):
			
 
				-    """
			
 
				-    Standard heads for FPN-based models
			
 
				-
			
 
				-    Args:
			
 
				-        in_channels (int): number of input channels
			
 
				-        representation_size (int): size of the intermediate representation
			
 
				-    """
			
 
				-
			
 
				-    def __init__(self, in_channels, representation_size):
			
 
				-        super().__init__()
			
 
				-
			
 
				-        self.fc6 = nn.Linear(in_channels, representation_size)
			
 
				-        self.fc7 = nn.Linear(representation_size, representation_size)
			
 
				-
			
 
				-    def forward(self, x):
			
 
				-        x = x.flatten(start_dim=1)
			
 
				-
			
 
				-        x = F.relu(self.fc6(x))
			
 
				-        x = F.relu(self.fc7(x))
			
 
				-
			
 
				-        return x
			
 
				-
			
 
				-
			
 
				-class LineNetConvFCHead(nn.Sequential):
			
 
				-    def __init__(
			
 
				-            self,
			
 
				-            input_size: Tuple[int, int, int],
			
 
				-            conv_layers: List[int],
			
 
				-            fc_layers: List[int],
			
 
				-            norm_layer: Optional[Callable[..., nn.Module]] = None,
			
 
				-    ):
			
 
				-        """
			
 
				-        Args:
			
 
				-            input_size (Tuple[int, int, int]): the input size in CHW format.
			
 
				-            conv_layers (list): feature dimensions of each Convolution layer
			
 
				-            fc_layers (list): feature dimensions of each FCN layer
			
 
				-            norm_layer (callable, optional): Module specifying the normalization layer to use. Default: None
			
 
				-        """
			
 
				-        in_channels, in_height, in_width = input_size
			
 
				-
			
 
				-        blocks = []
			
 
				-        previous_channels = in_channels
			
 
				-        for current_channels in conv_layers:
			
 
				-            blocks.append(misc_nn_ops.Conv2dNormActivation(previous_channels, current_channels, norm_layer=norm_layer))
			
 
				-            previous_channels = current_channels
			
 
				-        blocks.append(nn.Flatten())
			
 
				-        previous_channels = previous_channels * in_height * in_width
			
 
				-        for current_channels in fc_layers:
			
 
				-            blocks.append(nn.Linear(previous_channels, current_channels))
			
 
				-            blocks.append(nn.ReLU(inplace=True))
			
 
				-            previous_channels = current_channels
			
 
				-
			
 
				-        super().__init__(*blocks)
			
 
				-        for layer in self.modules():
			
 
				-            if isinstance(layer, nn.Conv2d):
			
 
				-                nn.init.kaiming_normal_(layer.weight, mode="fan_out", nonlinearity="relu")
			
 
				-                if layer.bias is not None:
			
 
				-                    nn.init.zeros_(layer.bias)
			
 
				-
			
 
				-
			
 
				-class BoxPredictor(nn.Module):
			
 
				-    """
			
 
				-    Standard classification + bounding box regression layers
			
 
				-    for Fast R-CNN.
			
 
				-
			
 
				-    Args:
			
 
				-        in_channels (int): number of input channels
			
 
				-        num_classes (int): number of output classes (including background)
			
 
				-    """
			
 
				-
			
 
				-    def __init__(self, in_channels, num_classes):
			
 
				-        super().__init__()
			
 
				-        self.cls_score = nn.Linear(in_channels, num_classes)
			
 
				-        self.bbox_pred = nn.Linear(in_channels, num_classes * 4)
			
 
				-
			
 
				-    def forward(self, x):
			
 
				-        if x.dim() == 4:
			
 
				-            torch._assert(
			
 
				-                list(x.shape[2:]) == [1, 1],
			
 
				-                f"x has the wrong shape, expecting the last two dimensions to be [1,1] instead of {list(x.shape[2:])}",
			
 
				-            )
			
 
				-        x = x.flatten(start_dim=1)
			
 
				-        scores = self.cls_score(x)
			
 
				-        bbox_deltas = self.bbox_pred(x)
			
 
				-
			
 
				-        return scores, bbox_deltas
			
 
				-
			
 
				-
			
 
				-_COMMON_META = {
			
 
				-    "categories": _COCO_CATEGORIES,
			
 
				-    "min_size": (1, 1),
			
 
				-}
			
 
				-
			
 
				-
			
 
				-class LineNet_ResNet50_FPN_Weights(WeightsEnum):
			
 
				-    COCO_V1 = Weights(
			
 
				-        url="https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth",
			
 
				-        transforms=ObjectDetection,
			
 
				-        meta={
			
 
				-            **_COMMON_META,
			
 
				-            "num_params": 41755286,
			
 
				-            "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-resnet-50-fpn",
			
 
				-            "_metrics": {
			
 
				-                "COCO-val2017": {
			
 
				-                    "box_map": 37.0,
			
 
				-                }
			
 
				-            },
			
 
				-            "_ops": 134.38,
			
 
				-            "_file_size": 159.743,
			
 
				-            "_docs": """These weights were produced by following a similar training recipe as on the paper.""",
			
 
				-        },
			
 
				-    )
			
 
				-    DEFAULT = COCO_V1
			
 
				-
			
 
				-
			
 
				-class LineNet_ResNet50_FPN_V2_Weights(WeightsEnum):
			
 
				-    COCO_V1 = Weights(
			
 
				-        url="https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth",
			
 
				-        transforms=ObjectDetection,
			
 
				-        meta={
			
 
				-            **_COMMON_META,
			
 
				-            "num_params": 43712278,
			
 
				-            "recipe": "https://github.com/pytorch/vision/pull/5763",
			
 
				-            "_metrics": {
			
 
				-                "COCO-val2017": {
			
 
				-                    "box_map": 46.7,
			
 
				-                }
			
 
				-            },
			
 
				-            "_ops": 280.371,
			
 
				-            "_file_size": 167.104,
			
 
				-            "_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
			
 
				-        },
			
 
				-    )
			
 
				-    DEFAULT = COCO_V1
			
 
				-
			
 
				-
			
 
				-class LineNet_MobileNet_V3_Large_FPN_Weights(WeightsEnum):
			
 
				-    COCO_V1 = Weights(
			
 
				-        url="https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_fpn-fb6a3cc7.pth",
			
 
				-        transforms=ObjectDetection,
			
 
				-        meta={
			
 
				-            **_COMMON_META,
			
 
				-            "num_params": 19386354,
			
 
				-            "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-fpn",
			
 
				-            "_metrics": {
			
 
				-                "COCO-val2017": {
			
 
				-                    "box_map": 32.8,
			
 
				-                }
			
 
				-            },
			
 
				-            "_ops": 4.494,
			
 
				-            "_file_size": 74.239,
			
 
				-            "_docs": """These weights were produced by following a similar training recipe as on the paper.""",
			
 
				-        },
			
 
				-    )
			
 
				-    DEFAULT = COCO_V1
			
 
				-
			
 
				-
			
 
				-class LineNet_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum):
			
 
				-    COCO_V1 = Weights(
			
 
				-        url="https://download.pytorch.org/models/fasterrcnn_mobilenet_v3_large_320_fpn-907ea3f9.pth",
			
 
				-        transforms=ObjectDetection,
			
 
				-        meta={
			
 
				-            **_COMMON_META,
			
 
				-            "num_params": 19386354,
			
 
				-            "recipe": "https://github.com/pytorch/vision/tree/main/references/detection#faster-r-cnn-mobilenetv3-large-320-fpn",
			
 
				-            "_metrics": {
			
 
				-                "COCO-val2017": {
			
 
				-                    "box_map": 22.8,
			
 
				-                }
			
 
				-            },
			
 
				-            "_ops": 0.719,
			
 
				-            "_file_size": 74.239,
			
 
				-            "_docs": """These weights were produced by following a similar training recipe as on the paper.""",
			
 
				-        },
			
 
				-    )
			
 
				-    DEFAULT = COCO_V1
			
 
				-
			
 
				-
			
 
				-@register_model()
			
 
				-@handle_legacy_interface(
			
 
				-    weights=("pretrained", LineNet_ResNet50_FPN_Weights.COCO_V1),
			
 
				-    weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
			
 
				-)
			
 
				-def linenet_resnet50_fpn(
			
 
				-        *,
			
 
				-        weights: Optional[LineNet_ResNet50_FPN_Weights] = None,
			
 
				-        progress: bool = True,
			
 
				-        num_classes: Optional[int] = None,
			
 
				-        weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
			
 
				-        trainable_backbone_layers: Optional[int] = None,
			
 
				-        **kwargs: Any,
			
 
				-) -> LineNet:
			
 
				-    """
			
 
				-    Faster R-CNN model with a ResNet-50-FPN backbone from the `Faster R-CNN: Towards Real-Time Object
			
 
				-    Detection with Region Proposal Networks <https://arxiv.org/abs/1506.01497>`__
			
 
				-    paper.
			
 
				-
			
 
				-    .. betastatus:: detection module
			
 
				-
			
 
				-    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
			
 
				-    image, and should be in ``0-1`` range. Different images can have different sizes.
			
 
				-
			
 
				-    The behavior of the model changes depending on if it is in training or evaluation mode.
			
 
				-
			
 
				-    During training, the model expects both the input tensors and a targets (list of dictionary),
			
 
				-    containing:
			
 
				-
			
 
				-        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
			
 
				-          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
			
 
				-        - labels (``Int64Tensor[N]``): the class label for each ground-truth box
			
 
				-
			
 
				-    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
			
 
				-    losses for both the RPN and the R-CNN.
			
 
				-
			
 
				-    During inference, the model requires only the input tensors, and returns the post-processed
			
 
				-    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
			
 
				-    follows, where ``N`` is the number of detections:
			
 
				-
			
 
				-        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
			
 
				-          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
			
 
				-        - labels (``Int64Tensor[N]``): the predicted labels for each detection
			
 
				-        - scores (``Tensor[N]``): the scores of each detection
			
 
				-
			
 
				-    For more details on the output, you may refer to :ref:`instance_seg_output`.
			
 
				-
			
 
				-    Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.
			
 
				-
			
 
				-    Example::
			
 
				-
			
 
				-        >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
			
 
				-        >>> # For training
			
 
				-        >>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
			
 
				-        >>> boxes[:, :, 2:4] = boxes[:, :, 0:2] + boxes[:, :, 2:4]
			
 
				-        >>> labels = torch.randint(1, 91, (4, 11))
			
 
				-        >>> images = list(image for image in images)
			
 
				-        >>> targets = []
			
 
				-        >>> for i in range(len(images)):
			
 
				-        >>>     d = {}
			
 
				-        >>>     d['boxes'] = boxes[i]
			
 
				-        >>>     d['labels'] = labels[i]
			
 
				-        >>>     targets.append(d)
			
 
				-        >>> output = model(images, targets)
			
 
				-        >>> # For inference
			
 
				-        >>> model.eval()
			
 
				-        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
			
 
				-        >>> predictions = model(x)
			
 
				-        >>>
			
 
				-        >>> # optionally, if you want to export the model to ONNX:
			
 
				-        >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)
			
 
				-
			
 
				-    Args:
			
 
				-        weights (:class:`~torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights`, optional): The
			
 
				-            pretrained weights to use. See
			
 
				-            :class:`~torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights` below for
			
 
				-            more details, and possible values. By default, no pre-trained
			
 
				-            weights are used.
			
 
				-        progress (bool, optional): If True, displays a progress bar of the
			
 
				-            download to stderr. Default is True.
			
 
				-        num_classes (int, optional): number of output classes of the model (including the background)
			
 
				-        weights_backbone (:class:`~torchvision.models.ResNet50_Weights`, optional): The
			
 
				-            pretrained weights for the backbone.
			
 
				-        trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from
			
 
				-            final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are
			
 
				-            trainable. If ``None`` is passed (the default) this value is set to 3.
			
 
				-        **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN``
			
 
				-            base class. Please refer to the `source code
			
 
				-            <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/faster_rcnn.py>`_
			
 
				-            for more details about this class.
			
 
				-
			
 
				-    .. autoclass:: torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights
			
 
				-        :members:
			
 
				-    """
			
 
				-    weights = LineNet_ResNet50_FPN_Weights.verify(weights)
			
 
				-    weights_backbone = ResNet50_Weights.verify(weights_backbone)
			
 
				-
			
 
				-    if weights is not None:
			
 
				-        weights_backbone = None
			
 
				-        num_classes = _ovewrite_value_param("num_classes", num_classes, len(weights.meta["categories"]))
			
 
				-    elif num_classes is None:
			
 
				-        num_classes = 91
			
 
				-
			
 
				-    is_trained = weights is not None or weights_backbone is not None
			
 
				-    trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
			
 
				-    norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
			
 
				-
			
 
				-    backbone = resnet50(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
			
 
				-    backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
			
 
				-    model = LineNet(backbone, num_classes=num_classes, **kwargs)
			
 
				-
			
 
				-    if weights is not None:
			
 
				-        model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
			
 
				-        if weights == LineNet_ResNet50_FPN_Weights.COCO_V1:
			
 
				-            overwrite_eps(model, 0.0)
			
 
				-
			
 
				-    return model
			
 
				-
			
 
				-
			
 
				-@register_model()
			
 
				-@handle_legacy_interface(
			
 
				-    weights=("pretrained", LineNet_ResNet50_FPN_V2_Weights.COCO_V1),
			
 
				-    weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
			
 
				-)
			
 
				-def linenet_resnet50_fpn_v2(
			
 
				-        *,
			
 
				-        weights: Optional[LineNet_ResNet50_FPN_V2_Weights] = None,
			
 
				-        progress: bool = True,
			
 
				-        num_classes: Optional[int] = None,
			
 
				-        weights_backbone: Optional[ResNet50_Weights] = None,
			
 
				-        trainable_backbone_layers: Optional[int] = None,
			
 
				-        **kwargs: Any,
			
 
				-) -> LineNet:
			
 
				-    """
			
 
				-    Constructs an improved Faster R-CNN model with a ResNet-50-FPN backbone from `Benchmarking Detection
			
 
				-    Transfer Learning with Vision Transformers <https://arxiv.org/abs/2111.11429>`__ paper.
			
 
				-
			
 
				-    .. betastatus:: detection module
			
 
				-
			
 
				-    It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
			
 
				-    :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more
			
 
				-    details.
			
 
				-
			
 
				-    Args:
			
 
				-        weights (:class:`~torchvision.models.detection.FasterRCNN_ResNet50_FPN_V2_Weights`, optional): The
			
 
				-            pretrained weights to use. See
			
 
				-            :class:`~torchvision.models.detection.FasterRCNN_ResNet50_FPN_V2_Weights` below for
			
 
				-            more details, and possible values. By default, no pre-trained
			
 
				-            weights are used.
			
 
				-        progress (bool, optional): If True, displays a progress bar of the
			
 
				-            download to stderr. Default is True.
			
 
				-        num_classes (int, optional): number of output classes of the model (including the background)
			
 
				-        weights_backbone (:class:`~torchvision.models.ResNet50_Weights`, optional): The
			
 
				-            pretrained weights for the backbone.
			
 
				-        trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from
			
 
				-            final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are
			
 
				-            trainable. If ``None`` is passed (the default) this value is set to 3.
			
 
				-        **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN``
			
 
				-            base class. Please refer to the `source code
			
 
				-            <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/faster_rcnn.py>`_
			
 
				-            for more details about this class.
			
 
				-
			
 
				-    .. autoclass:: torchvision.models.detection.FasterRCNN_ResNet50_FPN_V2_Weights
			
 
				-        :members:
			
 
				-    """
			
 
				-    weights = LineNet_ResNet50_FPN_V2_Weights.verify(weights)
			
 
				-    weights_backbone = ResNet50_Weights.verify(weights_backbone)
			
 
				-
			
 
				-    if weights is not None:
			
 
				-        weights_backbone = None
			
 
				-        num_classes = _ovewrite_value_param("num_classes", num_classes, len(weights.meta["categories"]))
			
 
				-    elif num_classes is None:
			
 
				-        num_classes = 91
			
 
				-
			
 
				-    is_trained = weights is not None or weights_backbone is not None
			
 
				-    trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
			
 
				-
			
 
				-    backbone = resnet50(weights=weights_backbone, progress=progress)
			
 
				-    backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers, norm_layer=nn.BatchNorm2d)
			
 
				-    rpn_anchor_generator = _default_anchorgen()
			
 
				-    rpn_head = RPNHead(backbone.out_channels, rpn_anchor_generator.num_anchors_per_location()[0], conv_depth=2)
			
 
				-    box_head = LineNetConvFCHead(
			
 
				-        (backbone.out_channels, 7, 7), [256, 256, 256, 256], [1024], norm_layer=nn.BatchNorm2d
			
 
				-    )
			
 
				-    model = LineNet(
			
 
				-        backbone,
			
 
				-        num_classes=num_classes,
			
 
				-        rpn_anchor_generator=rpn_anchor_generator,
			
 
				-        rpn_head=rpn_head,
			
 
				-        box_head=box_head,
			
 
				-        **kwargs,
			
 
				-    )
			
 
				-
			
 
				-    if weights is not None:
			
 
				-        model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
			
 
				-
			
 
				-    return model
			
 
				-
			
 
				-
			
 
				-def _linenet_mobilenet_v3_large_fpn(
			
 
				-        *,
			
 
				-        weights: Optional[Union[LineNet_MobileNet_V3_Large_FPN_Weights, LineNet_MobileNet_V3_Large_320_FPN_Weights]],
			
 
				-        progress: bool,
			
 
				-        num_classes: Optional[int],
			
 
				-        weights_backbone: Optional[MobileNet_V3_Large_Weights],
			
 
				-        trainable_backbone_layers: Optional[int],
			
 
				-        **kwargs: Any,
			
 
				-) -> LineNet:
			
 
				-    if weights is not None:
			
 
				-        weights_backbone = None
			
 
				-        num_classes = _ovewrite_value_param("num_classes", num_classes, len(weights.meta["categories"]))
			
 
				-    elif num_classes is None:
			
 
				-        num_classes = 91
			
 
				-
			
 
				-    is_trained = weights is not None or weights_backbone is not None
			
 
				-    trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 6, 3)
			
 
				-    norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d
			
 
				-
			
 
				-    backbone = mobilenet_v3_large(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
			
 
				-    backbone = _mobilenet_extractor(backbone, True, trainable_backbone_layers)
			
 
				-    anchor_sizes = (
			
 
				-                       (
			
 
				-                           32,
			
 
				-                           64,
			
 
				-                           128,
			
 
				-                           256,
			
 
				-                           512,
			
 
				-                       ),
			
 
				-                   ) * 3
			
 
				-    aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
			
 
				-    model = LineNet(
			
 
				-        backbone, num_classes, rpn_anchor_generator=AnchorGenerator(anchor_sizes, aspect_ratios), **kwargs
			
 
				-    )
			
 
				-
			
 
				-    if weights is not None:
			
 
				-        model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
			
 
				-
			
 
				-    return model
			
 
				-
			
 
				-
			
 
				-@register_model()
			
 
				-@handle_legacy_interface(
			
 
				-    weights=("pretrained", LineNet_MobileNet_V3_Large_320_FPN_Weights.COCO_V1),
			
 
				-    weights_backbone=("pretrained_backbone", MobileNet_V3_Large_Weights.IMAGENET1K_V1),
			
 
				-)
			
 
				-def linenet_mobilenet_v3_large_320_fpn(
			
 
				-        *,
			
 
				-        weights: Optional[LineNet_MobileNet_V3_Large_320_FPN_Weights] = None,
			
 
				-        progress: bool = True,
			
 
				-        num_classes: Optional[int] = None,
			
 
				-        weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
			
 
				-        trainable_backbone_layers: Optional[int] = None,
			
 
				-        **kwargs: Any,
			
 
				-) -> LineNet:
			
 
				-    """
			
 
				-    Low resolution Faster R-CNN model with a MobileNetV3-Large backbone tuned for mobile use cases.
			
 
				-
			
 
				-    .. betastatus:: detection module
			
 
				-
			
 
				-    It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
			
 
				-    :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more
			
 
				-    details.
			
 
				-
			
 
				-    Example::
			
 
				-
			
 
				-        >>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_320_fpn(weights=FasterRCNN_MobileNet_V3_Large_320_FPN_Weights.DEFAULT)
			
 
				-        >>> model.eval()
			
 
				-        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
			
 
				-        >>> predictions = model(x)
			
 
				-
			
 
				-    Args:
			
 
				-        weights (:class:`~torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_320_FPN_Weights`, optional): The
			
 
				-            pretrained weights to use. See
			
 
				-            :class:`~torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_320_FPN_Weights` below for
			
 
				-            more details, and possible values. By default, no pre-trained
			
 
				-            weights are used.
			
 
				-        progress (bool, optional): If True, displays a progress bar of the
			
 
				-            download to stderr. Default is True.
			
 
				-        num_classes (int, optional): number of output classes of the model (including the background)
			
 
				-        weights_backbone (:class:`~torchvision.models.MobileNet_V3_Large_Weights`, optional): The
			
 
				-            pretrained weights for the backbone.
			
 
				-        trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from
			
 
				-            final block. Valid values are between 0 and 6, with 6 meaning all backbone layers are
			
 
				-            trainable. If ``None`` is passed (the default) this value is set to 3.
			
 
				-        **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN``
			
 
				-            base class. Please refer to the `source code
			
 
				-            <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/faster_rcnn.py>`_
			
 
				-            for more details about this class.
			
 
				-
			
 
				-    .. autoclass:: torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_320_FPN_Weights
			
 
				-        :members:
			
 
				-    """
			
 
				-    weights = LineNet_MobileNet_V3_Large_320_FPN_Weights.verify(weights)
			
 
				-    weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone)
			
 
				-
			
 
				-    defaults = {
			
 
				-        "min_size": 320,
			
 
				-        "max_size": 640,
			
 
				-        "rpn_pre_nms_top_n_test": 150,
			
 
				-        "rpn_post_nms_top_n_test": 150,
			
 
				-        "rpn_score_thresh": 0.05,
			
 
				-    }
			
 
				-
			
 
				-    kwargs = {**defaults, **kwargs}
			
 
				-    return _linenet_mobilenet_v3_large_fpn(
			
 
				-        weights=weights,
			
 
				-        progress=progress,
			
 
				-        num_classes=num_classes,
			
 
				-        weights_backbone=weights_backbone,
			
 
				-        trainable_backbone_layers=trainable_backbone_layers,
			
 
				-        **kwargs,
			
 
				-    )
			
 
				-
			
 
				-
			
 
				-@register_model()
			
 
				-@handle_legacy_interface(
			
 
				-    weights=("pretrained", LineNet_MobileNet_V3_Large_FPN_Weights.COCO_V1),
			
 
				-    weights_backbone=("pretrained_backbone", MobileNet_V3_Large_Weights.IMAGENET1K_V1),
			
 
				-)
			
 
				-def linenet_mobilenet_v3_large_fpn(
			
 
				-        *,
			
 
				-        weights: Optional[LineNet_MobileNet_V3_Large_FPN_Weights] = None,
			
 
				-        progress: bool = True,
			
 
				-        num_classes: Optional[int] = None,
			
 
				-        weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
			
 
				-        trainable_backbone_layers: Optional[int] = None,
			
 
				-        **kwargs: Any,
			
 
				-) -> LineNet:
			
 
				-    """
			
 
				-    Constructs a high resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone.
			
 
				-
			
 
				-    .. betastatus:: detection module
			
 
				-
			
 
				-    It works similarly to Faster R-CNN with ResNet-50 FPN backbone. See
			
 
				-    :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn` for more
			
 
				-    details.
			
 
				-
			
 
				-    Example::
			
 
				-
			
 
				-        >>> model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.DEFAULT)
			
 
				-        >>> model.eval()
			
 
				-        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
			
 
				-        >>> predictions = model(x)
			
 
				-
			
 
				-    Args:
			
 
				-        weights (:class:`~torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights`, optional): The
			
 
				-            pretrained weights to use. See
			
 
				-            :class:`~torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights` below for
			
 
				-            more details, and possible values. By default, no pre-trained
			
 
				-            weights are used.
			
 
				-        progress (bool, optional): If True, displays a progress bar of the
			
 
				-            download to stderr. Default is True.
			
 
				-        num_classes (int, optional): number of output classes of the model (including the background)
			
 
				-        weights_backbone (:class:`~torchvision.models.MobileNet_V3_Large_Weights`, optional): The
			
 
				-            pretrained weights for the backbone.
			
 
				-        trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from
			
 
				-            final block. Valid values are between 0 and 6, with 6 meaning all backbone layers are
			
 
				-            trainable. If ``None`` is passed (the default) this value is set to 3.
			
 
				-        **kwargs: parameters passed to the ``torchvision.models.detection.faster_rcnn.FasterRCNN``
			
 
				-            base class. Please refer to the `source code
			
 
				-            <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/faster_rcnn.py>`_
			
 
				-            for more details about this class.
			
 
				-
			
 
				-    .. autoclass:: torchvision.models.detection.FasterRCNN_MobileNet_V3_Large_FPN_Weights
			
 
				-        :members:
			
 
				-    """
			
 
				-    weights = LineNet_MobileNet_V3_Large_FPN_Weights.verify(weights)
			
 
				-    weights_backbone = MobileNet_V3_Large_Weights.verify(weights_backbone)
			
 
				-
			
 
				-    defaults = {
			
 
				-        "rpn_score_thresh": 0.05,
			
 
				-    }
			
 
				-
			
 
				-    kwargs = {**defaults, **kwargs}
			
 
				-    return _linenet_mobilenet_v3_large_fpn(
			
 
				-        weights=weights,
			
 
				-        progress=progress,
			
 
				-        num_classes=num_classes,
			
 
				-        weights_backbone=weights_backbone,
			
 
				-        trainable_backbone_layers=trainable_backbone_layers,
			
 
				-        **kwargs,
			
 
				-    )
			
--- a/models/line_detect/.ipynb_checkpoints/untitled-checkpoint.py
+++ b/models/line_detect/.ipynb_checkpoints/untitled-checkpoint.py
--- a/models/line_detect/line_predictor.py
+++ b/models/line_detect/line_predictor.py
@@ -66,32 +66,6 @@ class LineRCNNPredictor(nn.Module):
 
				                  head_size = [[2], [1], [2]] ,
			
 
				                  **kwargs):
			
 
				         super().__init__()
			
 
				-        # self.backbone = backbone
			
 
				-        # self.cfg = read_yaml(cfg)
			
 
				-        # self.cfg = read_yaml(r'./config/wireframe.yaml')
			
 
				-
			
 
				-        # print(f'linePredictor cfg:{cfg}')
			
 
				-        #
			
 
				-        # self.cfg = cfg
			
 
				-        # self.n_pts0 = self.cfg['n_pts0']
			
 
				-        # self.n_pts1 = self.cfg['n_pts1']
			
 
				-        # self.n_stc_posl = self.cfg['n_stc_posl']
			
 
				-        # self.dim_loi = self.cfg['dim_loi']
			
 
				-        # self.use_conv = self.cfg['use_conv']
			
 
				-        # self.dim_fc = self.cfg['dim_fc']
			
 
				-        # self.n_out_line = self.cfg['n_out_line']
			
 
				-        # self.n_out_junc = self.cfg['n_out_junc']
			
 
				-        # self.loss_weight = self.cfg['loss_weight']
			
 
				-        # self.n_dyn_junc = self.cfg['n_dyn_junc']
			
 
				-        # self.eval_junc_thres = self.cfg['eval_junc_thres']
			
 
				-        # self.n_dyn_posl = self.cfg['n_dyn_posl']
			
 
				-        # self.n_dyn_negl = self.cfg['n_dyn_negl']
			
 
				-        # self.n_dyn_othr = self.cfg['n_dyn_othr']
			
 
				-        # self.use_cood = self.cfg['use_cood']
			
 
				-        # self.use_slop = self.cfg['use_slop']
			
 
				-        # self.n_stc_negl = self.cfg['n_stc_negl']
			
 
				-        # self.head_size = self.cfg['head_size']
			
 
				-
			
 
				 
			
 
				         self.n_pts0 = n_pts0
			
 
				         self.n_pts1 = n_pts1
			
@@ -176,14 +150,14 @@ class LineRCNNPredictor(nn.Module):
 
				                 "jtyp": torch.zeros(1, dtype=torch.uint8),
			
 
				                 "line_pos_idx": torch.zeros(2, 2, dtype=torch.uint8),
			
 
				                 "line_neg_idx": torch.zeros(2, 2, dtype=torch.uint8),
			
 
				-                "junc_map": torch.zeros([1, 1, 128, 128]),
			
 
				-                "junc_offset": torch.zeros([1, 1, 2, 128, 128]),
			
 
				+                "junc_map": torch.zeros([1, 1, 512, 512]),
			
 
				+                "junc_offset": torch.zeros([1, 1, 2, 512, 512]),
			
 
				             }
			
 
				             wires_targets = [t for b in range(inputs.size(0))]
			
 
				 
			
 
				             wires_meta = {
			
 
				-                "junc_map": torch.zeros([1, 1, 128, 128]),
			
 
				-                "junc_offset": torch.zeros([1, 1, 2, 128, 128]),
			
 
				+                "junc_map": torch.zeros([1, 1, 512, 512]),
			
 
				+                "junc_offset": torch.zeros([1, 1, 2, 512, 512]),
			
 
				             }
			
 
				 
			
 
				         T = wires_meta.copy()
			
@@ -238,10 +212,10 @@ class LineRCNNPredictor(nn.Module):
 
				             p = p[:, 0:1, :] * self.lambda_ + p[:, 1:2, :] * (1 - self.lambda_) - 0.5
			
 
				             p = p.reshape(-1, 2)  # [N_LINE x N_POINT, 2_XY]
			
 
				             px, py = p[:, 0].contiguous(), p[:, 1].contiguous()
			
 
				-            px0 = px.floor().clamp(min=0, max=127)
			
 
				-            py0 = py.floor().clamp(min=0, max=127)
			
 
				-            px1 = (px0 + 1).clamp(min=0, max=127)
			
 
				-            py1 = (py0 + 1).clamp(min=0, max=127)
			
 
				+            px0 = px.floor().clamp(min=0, max=511)
			
 
				+            py0 = py.floor().clamp(min=0, max=511)
			
 
				+            px1 = (px0 + 1).clamp(min=0, max=511)
			
 
				+            py1 = (py0 + 1).clamp(min=0, max=511)
			
 
				             px0l, py0l, px1l, py1l = px0.long(), py0.long(), px1.long(), py1.long()
			
 
				 
			
 
				             # xp: [N_LINE, N_CHANNEL, N_POINT]
			
@@ -305,8 +279,8 @@ class LineRCNNPredictor(nn.Module):
 
				 
			
 
				             # index: [N_TYPE, K]
			
 
				             score, index = torch.topk(jmap, k=K)
			
 
				-            y = (index // 128).float() + torch.gather(joff[:, 0], 1, index) + 0.5
			
 
				-            x = (index % 128).float() + torch.gather(joff[:, 1], 1, index) + 0.5
			
 
				+            y = (index // 512).float() + torch.gather(joff[:, 0], 1, index) + 0.5
			
 
				+            x = (index % 512).float() + torch.gather(joff[:, 1], 1, index) + 0.5
			
 
				 
			
 
				             # xy: [N_TYPE, K, 2]
			
 
				             xy = torch.cat([y[..., None], x[..., None]], dim=-1)
			
@@ -366,8 +340,8 @@ class LineRCNNPredictor(nn.Module):
 
				             u2v /= torch.sqrt((u2v ** 2).sum(-1, keepdim=True)).clamp(min=1e-6)
			
 
				             feat = torch.cat(
			
 
				                 [
			
 
				-                    xyu / 128 * self.use_cood,
			
 
				-                    xyv / 128 * self.use_cood,
			
 
				+                    xyu / 512 * self.use_cood,
			
 
				+                    xyv / 512 * self.use_cood,
			
 
				                     u2v * self.use_slop,
			
 
				                     (u[:, None] > K).float(),
			
 
				                     (v[:, None] > K).float(),
			
--- a/models/line_detect/roi_heads.py
+++ b/models/line_detect/roi_heads.py
@@ -29,7 +29,30 @@ def sigmoid_l1_loss(logits, target, offset=0.0, mask=None):
 
				         loss = loss * (mask / w)
			
 
				 
			
 
				     return loss.mean(2).mean(1)
			
 
				+class DiceLoss(nn.Module):
			
 
				+    def __init__(self, smooth=1.):
			
 
				+        super(DiceLoss, self).__init__()
			
 
				+        self.smooth = smooth
			
 
				 
			
 
				+    def forward(self, logits, targets):
			
 
				+        probs = torch.sigmoid(logits)
			
 
				+        probs = probs.view(-1)
			
 
				+        targets = targets.view(-1).float()
			
 
				+
			
 
				+        intersection = (probs * targets).sum()
			
 
				+        dice = (2. * intersection + self.smooth) / (probs.sum() + targets.sum() + self.smooth)
			
 
				+        return 1. - dice
			
 
				+
			
 
				+
			
 
				+
			
 
				+bce_loss = nn.BCEWithLogitsLoss()
			
 
				+dice_loss = DiceLoss()
			
 
				+
			
 
				+
			
 
				+def combined_loss(preds, targets, alpha=0.5):
			
 
				+    bce = bce_loss(preds, targets)
			
 
				+    d = dice_loss(preds, targets)
			
 
				+    return alpha * bce + (1 - alpha) * d
			
 
				 
			
 
				 ###计算多头损失
			
 
				 def line_head_loss(input_dict, outputs, feature, loss_weight, mode_train):
			
@@ -179,14 +202,17 @@ def wirepoint_head_line_loss(targets, output, x, y, idx, loss_weight):
 
				     lmap = output[offset[0]: offset[1]].squeeze(0)
			
 
				     joff = output[offset[1]: offset[2]].reshape(n_jtyp, 2, batch, row, col)
			
 
				     L = OrderedDict()
			
 
				-    L["junc_map"] = sum(
			
 
				-        cross_entropy_loss(jmap[i], T["junc_map"][i]) for i in range(n_jtyp)
			
 
				-    ).mean()
			
 
				-    L["line_map"] = (
			
 
				-        F.binary_cross_entropy_with_logits(lmap, T["line_map"], reduction="none")
			
 
				-            .mean(2)
			
 
				-            .mean(1)
			
 
				-    ).mean()
			
 
				+    # L["junc_map"] = sum(
			
 
				+    #     cross_entropy_loss(jmap[i], T["junc_map"][i]) for i in range(n_jtyp)
			
 
				+    # ).mean()
			
 
				+    # L["line_map"] = (
			
 
				+    #     F.binary_cross_entropy_with_logits(lmap, T["line_map"], reduction="none")
			
 
				+    #         .mean(2)
			
 
				+    #         .mean(1)
			
 
				+    # ).mean()
			
 
				+    L["junc_map"] = combined_loss(jmap[:, 1, :, :, :], T["junc_map"])
			
 
				+
			
 
				+    L["line_map"] = combined_loss(lmap, T["line_map"])
			
 
				     L["junc_offset"] = sum(
			
 
				         sigmoid_l1_loss(joff[i, j], T["junc_offset"][i, j], -0.5, T["junc_map"][i])
			
 
				         for i in range(n_jtyp)
			
--- a/models/line_detect/train.yaml
+++ b/models/line_detect/train.yaml
@@ -1,6 +1,6 @@
 
				 io:
			
 
				   logdir: train_results
			
 
				-  datadir: \\192.168.50.222/share/lm/Dataset_all
			
 
				+  datadir: /data/share/lm/Dataset_all
			
 
				 #  datadir: D:\python\PycharmProjects\data_20250223\0423_
			
 
				 #  datadir: I:\datasets\wirenet_1000
			
 
				 
			
@@ -10,7 +10,7 @@ io:
 
				 train_params:
			
 
				   resume_from:
			
 
				   num_workers: 8
			
 
				-  batch_size: 4
			
 
				+  batch_size: 2
			
 
				   max_epoch: 80000
			
 
				   optim:
			
 
				     name: Adam
			
--- a/models/line_detect/train_demo.py
+++ b/models/line_detect/train_demo.py
@@ -12,6 +12,7 @@ if __name__ == '__main__':
 
				     # model = linenet_resnet18_fpn()
			
 
				     # model=get_line_net_convnext_fpn(num_classes=2).to(device)
			
 
				     model=linenet_newresnet50fpn()
			
 
				+    model.load_best_model('train_results/20250622_135121/weights/best_val.pth')
			
 
				     # trainer = Trainer()
			
 
				     # trainer.train_cfg(model,cfg='./train.yaml')
			
 
				     model.start_train(cfg='train.yaml')
			
--- a/models/line_detect/trainer.py
+++ b/models/line_detect/trainer.py
@@ -166,7 +166,7 @@ class Trainer(BaseTrainer):
 
				         # plt.imshow(lmap)
			
 
				         # plt.show()
			
 
				         H = result[-1]['wires']
			
 
				-        lines = H["lines"][0].cpu().numpy() / 128 * im.shape[:2]
			
 
				+        lines = H["lines"][0].cpu().numpy()
			
 
				         scores = H["score"][0].cpu().numpy()
			
 
				         for i in range(1, len(lines)):
			
 
				             if (lines[i] == lines[0]).all():
			
@@ -313,6 +313,7 @@ class Trainer(BaseTrainer):
 
				                 t_end = time.time()
			
 
				                 print(f'predict used:{t_end - t_start}')
			
 
				                 self.writer_predict_result(img=imgs[0], result=result, epoch=epoch)
			
 
				+                epoch_step+=1
			
 
				 
			
 
				         avg_loss = total_loss / len(data_loader)
			
 
				         print(f'{phase}/loss epoch{epoch}:{avg_loss:4f}')