|  | @@ -1,25 +1,35 @@
 | 
	
		
			
				|  |  |  from typing import Any, Callable, List, Optional, Tuple, Union
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |  import torch
 | 
	
		
			
				|  |  | -import torch.nn.functional as F
 | 
	
		
			
				|  |  |  from torch import nn
 | 
	
		
			
				|  |  |  from torchvision.ops import MultiScaleRoIAlign
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -from  libs.vision_libs.ops import misc as misc_nn_ops
 | 
	
		
			
				|  |  | +from libs.vision_libs.models import MobileNet_V3_Large_Weights, mobilenet_v3_large
 | 
	
		
			
				|  |  | +from libs.vision_libs.models.detection.anchor_utils import AnchorGenerator
 | 
	
		
			
				|  |  | +from libs.vision_libs.models.detection.rpn import RPNHead, RegionProposalNetwork
 | 
	
		
			
				|  |  | +from libs.vision_libs.models.detection.ssdlite import _mobilenet_extractor
 | 
	
		
			
				|  |  | +from libs.vision_libs.models.detection.transform import GeneralizedRCNNTransform
 | 
	
		
			
				|  |  | +from libs.vision_libs.ops import misc as misc_nn_ops
 | 
	
		
			
				|  |  |  from libs.vision_libs.transforms._presets import ObjectDetection
 | 
	
		
			
				|  |  | +from .line_head import LineRCNNHeads
 | 
	
		
			
				|  |  | +from .line_predictor import LineRCNNPredictor
 | 
	
		
			
				|  |  |  from libs.vision_libs.models._api import register_model, Weights, WeightsEnum
 | 
	
		
			
				|  |  | -from libs.vision_libs.models._meta import _COCO_CATEGORIES
 | 
	
		
			
				|  |  | +from libs.vision_libs.models._meta import _COCO_PERSON_CATEGORIES, _COCO_PERSON_KEYPOINT_NAMES, _COCO_CATEGORIES
 | 
	
		
			
				|  |  |  from libs.vision_libs.models._utils import _ovewrite_value_param, handle_legacy_interface
 | 
	
		
			
				|  |  | -from libs.vision_libs.models.mobilenetv3 import mobilenet_v3_large, MobileNet_V3_Large_Weights
 | 
	
		
			
				|  |  |  from libs.vision_libs.models.resnet import resnet50, ResNet50_Weights
 | 
	
		
			
				|  |  |  from libs.vision_libs.models.detection._utils import overwrite_eps
 | 
	
		
			
				|  |  | -from libs.vision_libs.models.detection.anchor_utils import AnchorGenerator
 | 
	
		
			
				|  |  | -from libs.vision_libs.models.detection.backbone_utils import _mobilenet_extractor, _resnet_fpn_extractor, _validate_trainable_layers
 | 
	
		
			
				|  |  | +from libs.vision_libs.models.detection.backbone_utils import _resnet_fpn_extractor, _validate_trainable_layers
 | 
	
		
			
				|  |  | +from libs.vision_libs.models.detection.faster_rcnn import FasterRCNN, TwoMLPHead, FastRCNNPredictor
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -from libs.vision_libs.models.detection.rpn import RegionProposalNetwork, RPNHead
 | 
	
		
			
				|  |  | -from libs.vision_libs.models.detection.transform import GeneralizedRCNNTransform
 | 
	
		
			
				|  |  | +from .roi_heads import RoIHeads
 | 
	
		
			
				|  |  | +from .trainer import Trainer
 | 
	
		
			
				|  |  | +from ..base import backbone_factory
 | 
	
		
			
				|  |  | +from ..base.base_detection_net import BaseDetectionNet
 | 
	
		
			
				|  |  | +import torch.nn.functional as F
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +from ..config.config_tool import read_yaml
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -######## 弃用  ###########
 | 
	
		
			
				|  |  | +FEATURE_DIM = 8
 | 
	
		
			
				|  |  | +device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  __all__ = [
 | 
	
		
			
				|  |  |      "LineNet",
 | 
	
	
		
			
				|  | @@ -28,15 +38,11 @@ __all__ = [
 | 
	
		
			
				|  |  |      "LineNet_MobileNet_V3_Large_FPN_Weights",
 | 
	
		
			
				|  |  |      "LineNet_MobileNet_V3_Large_320_FPN_Weights",
 | 
	
		
			
				|  |  |      "linenet_resnet50_fpn",
 | 
	
		
			
				|  |  | -    "fasterrcnn_resnet50_fpn_v2",
 | 
	
		
			
				|  |  | +    "linenet_resnet50_fpn_v2",
 | 
	
		
			
				|  |  |      "linenet_mobilenet_v3_large_fpn",
 | 
	
		
			
				|  |  |      "linenet_mobilenet_v3_large_320_fpn",
 | 
	
		
			
				|  |  |  ]
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -from .roi_heads import RoIHeads
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -from ..base.base_detection_net import BaseDetectionNet
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def _default_anchorgen():
 | 
	
		
			
				|  |  |      anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
 | 
	
	
		
			
				|  | @@ -45,161 +51,56 @@ def _default_anchorgen():
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  class LineNet(BaseDetectionNet):
 | 
	
		
			
				|  |  | -    """
 | 
	
		
			
				|  |  | -    Implements Faster R-CNN.
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
 | 
	
		
			
				|  |  | -    image, and should be in 0-1 range. Different images can have different sizes.
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    The behavior of the model changes depending on if it is in training or evaluation mode.
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    During training, the model expects both the input tensors and targets (list of dictionary),
 | 
	
		
			
				|  |  | -    containing:
 | 
	
		
			
				|  |  | -        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
 | 
	
		
			
				|  |  | -          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
 | 
	
		
			
				|  |  | -        - labels (Int64Tensor[N]): the class label for each ground-truth box
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    The model returns a Dict[Tensor] during training, containing the classification and regression
 | 
	
		
			
				|  |  | -    losses for both the RPN and the R-CNN.
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    During inference, the model requires only the input tensors, and returns the post-processed
 | 
	
		
			
				|  |  | -    predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
 | 
	
		
			
				|  |  | -    follows:
 | 
	
		
			
				|  |  | -        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
 | 
	
		
			
				|  |  | -          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
 | 
	
		
			
				|  |  | -        - labels (Int64Tensor[N]): the predicted labels for each image
 | 
	
		
			
				|  |  | -        - scores (Tensor[N]): the scores or each prediction
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    Args:
 | 
	
		
			
				|  |  | -        backbone (nn.Module): the network used to compute the features for the model.
 | 
	
		
			
				|  |  | -            It should contain an out_channels attribute, which indicates the number of output
 | 
	
		
			
				|  |  | -            channels that each feature map has (and it should be the same for all feature maps).
 | 
	
		
			
				|  |  | -            The backbone should return a single Tensor or and OrderedDict[Tensor].
 | 
	
		
			
				|  |  | -        num_classes (int): number of output classes of the model (including the background).
 | 
	
		
			
				|  |  | -            If box_predictor is specified, num_classes should be None.
 | 
	
		
			
				|  |  | -        min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
 | 
	
		
			
				|  |  | -        max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
 | 
	
		
			
				|  |  | -        image_mean (Tuple[float, float, float]): mean values used for input normalization.
 | 
	
		
			
				|  |  | -            They are generally the mean values of the dataset on which the backbone has been trained
 | 
	
		
			
				|  |  | -            on
 | 
	
		
			
				|  |  | -        image_std (Tuple[float, float, float]): std values used for input normalization.
 | 
	
		
			
				|  |  | -            They are generally the std values of the dataset on which the backbone has been trained on
 | 
	
		
			
				|  |  | -        rpn_anchor_generator (AnchorGenerator): module that generates the anchors for a set of feature
 | 
	
		
			
				|  |  | -            maps.
 | 
	
		
			
				|  |  | -        rpn_head (nn.Module): module that computes the objectness and regression deltas from the RPN
 | 
	
		
			
				|  |  | -        rpn_pre_nms_top_n_train (int): number of proposals to keep before applying NMS during training
 | 
	
		
			
				|  |  | -        rpn_pre_nms_top_n_test (int): number of proposals to keep before applying NMS during testing
 | 
	
		
			
				|  |  | -        rpn_post_nms_top_n_train (int): number of proposals to keep after applying NMS during training
 | 
	
		
			
				|  |  | -        rpn_post_nms_top_n_test (int): number of proposals to keep after applying NMS during testing
 | 
	
		
			
				|  |  | -        rpn_nms_thresh (float): NMS threshold used for postprocessing the RPN proposals
 | 
	
		
			
				|  |  | -        rpn_fg_iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
 | 
	
		
			
				|  |  | -            considered as positive during training of the RPN.
 | 
	
		
			
				|  |  | -        rpn_bg_iou_thresh (float): maximum IoU between the anchor and the GT box so that they can be
 | 
	
		
			
				|  |  | -            considered as negative during training of the RPN.
 | 
	
		
			
				|  |  | -        rpn_batch_size_per_image (int): number of anchors that are sampled during training of the RPN
 | 
	
		
			
				|  |  | -            for computing the loss
 | 
	
		
			
				|  |  | -        rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
 | 
	
		
			
				|  |  | -            of the RPN
 | 
	
		
			
				|  |  | -        rpn_score_thresh (float): during inference, only return proposals with a classification score
 | 
	
		
			
				|  |  | -            greater than rpn_score_thresh
 | 
	
		
			
				|  |  | -        box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
 | 
	
		
			
				|  |  | -            the locations indicated by the bounding boxes
 | 
	
		
			
				|  |  | -        box_head (nn.Module): module that takes the cropped feature maps as input
 | 
	
		
			
				|  |  | -        box_predictor (nn.Module): module that takes the output of box_head and returns the
 | 
	
		
			
				|  |  | -            classification logits and box regression deltas.
 | 
	
		
			
				|  |  | -        box_score_thresh (float): during inference, only return proposals with a classification score
 | 
	
		
			
				|  |  | -            greater than box_score_thresh
 | 
	
		
			
				|  |  | -        box_nms_thresh (float): NMS threshold for the prediction head. Used during inference
 | 
	
		
			
				|  |  | -        box_detections_per_img (int): maximum number of detections per image, for all classes.
 | 
	
		
			
				|  |  | -        box_fg_iou_thresh (float): minimum IoU between the proposals and the GT box so that they can be
 | 
	
		
			
				|  |  | -            considered as positive during training of the classification head
 | 
	
		
			
				|  |  | -        box_bg_iou_thresh (float): maximum IoU between the proposals and the GT box so that they can be
 | 
	
		
			
				|  |  | -            considered as negative during training of the classification head
 | 
	
		
			
				|  |  | -        box_batch_size_per_image (int): number of proposals that are sampled during training of the
 | 
	
		
			
				|  |  | -            classification head
 | 
	
		
			
				|  |  | -        box_positive_fraction (float): proportion of positive proposals in a mini-batch during training
 | 
	
		
			
				|  |  | -            of the classification head
 | 
	
		
			
				|  |  | -        bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the
 | 
	
		
			
				|  |  | -            bounding boxes
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    Example::
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        >>> import torch
 | 
	
		
			
				|  |  | -        >>> import torchvision
 | 
	
		
			
				|  |  | -        >>> from torchvision.models.detection import FasterRCNN
 | 
	
		
			
				|  |  | -        >>> from torchvision.models.detection.rpn import AnchorGenerator
 | 
	
		
			
				|  |  | -        >>> # load a pre-trained model for classification and return
 | 
	
		
			
				|  |  | -        >>> # only the features
 | 
	
		
			
				|  |  | -        >>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
 | 
	
		
			
				|  |  | -        >>> # FasterRCNN needs to know the number of
 | 
	
		
			
				|  |  | -        >>> # output channels in a backbone. For mobilenet_v2, it's 1280,
 | 
	
		
			
				|  |  | -        >>> # so we need to add it here
 | 
	
		
			
				|  |  | -        >>> backbone.out_channels = 1280
 | 
	
		
			
				|  |  | -        >>>
 | 
	
		
			
				|  |  | -        >>> # let's make the RPN generate 5 x 3 anchors per spatial
 | 
	
		
			
				|  |  | -        >>> # location, with 5 different sizes and 3 different aspect
 | 
	
		
			
				|  |  | -        >>> # ratios. We have a Tuple[Tuple[int]] because each feature
 | 
	
		
			
				|  |  | -        >>> # map could potentially have different sizes and
 | 
	
		
			
				|  |  | -        >>> # aspect ratios
 | 
	
		
			
				|  |  | -        >>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
 | 
	
		
			
				|  |  | -        >>>                                    aspect_ratios=((0.5, 1.0, 2.0),))
 | 
	
		
			
				|  |  | -        >>>
 | 
	
		
			
				|  |  | -        >>> # let's define what are the feature maps that we will
 | 
	
		
			
				|  |  | -        >>> # use to perform the region of interest cropping, as well as
 | 
	
		
			
				|  |  | -        >>> # the size of the crop after rescaling.
 | 
	
		
			
				|  |  | -        >>> # if your backbone returns a Tensor, featmap_names is expected to
 | 
	
		
			
				|  |  | -        >>> # be ['0']. More generally, the backbone should return an
 | 
	
		
			
				|  |  | -        >>> # OrderedDict[Tensor], and in featmap_names you can choose which
 | 
	
		
			
				|  |  | -        >>> # feature maps to use.
 | 
	
		
			
				|  |  | -        >>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
 | 
	
		
			
				|  |  | -        >>>                                                 output_size=7,
 | 
	
		
			
				|  |  | -        >>>                                                 sampling_ratio=2)
 | 
	
		
			
				|  |  | -        >>>
 | 
	
		
			
				|  |  | -        >>> # put the pieces together inside a FasterRCNN model
 | 
	
		
			
				|  |  | -        >>> model = FasterRCNN(backbone,
 | 
	
		
			
				|  |  | -        >>>                    num_classes=2,
 | 
	
		
			
				|  |  | -        >>>                    rpn_anchor_generator=anchor_generator,
 | 
	
		
			
				|  |  | -        >>>                    box_roi_pool=roi_pooler)
 | 
	
		
			
				|  |  | -        >>> model.eval()
 | 
	
		
			
				|  |  | -        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
 | 
	
		
			
				|  |  | -        >>> predictions = model(x)
 | 
	
		
			
				|  |  | -    """
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    def __init__(
 | 
	
		
			
				|  |  | -        self,
 | 
	
		
			
				|  |  | -        backbone,
 | 
	
		
			
				|  |  | -        num_classes=None,
 | 
	
		
			
				|  |  | -        # transform parameters
 | 
	
		
			
				|  |  | -        min_size=512,
 | 
	
		
			
				|  |  | -        max_size=1333,
 | 
	
		
			
				|  |  | -        image_mean=None,
 | 
	
		
			
				|  |  | -        image_std=None,
 | 
	
		
			
				|  |  | -        # RPN parameters
 | 
	
		
			
				|  |  | -        rpn_anchor_generator=None,
 | 
	
		
			
				|  |  | -        rpn_head=None,
 | 
	
		
			
				|  |  | -        rpn_pre_nms_top_n_train=2000,
 | 
	
		
			
				|  |  | -        rpn_pre_nms_top_n_test=1000,
 | 
	
		
			
				|  |  | -        rpn_post_nms_top_n_train=2000,
 | 
	
		
			
				|  |  | -        rpn_post_nms_top_n_test=1000,
 | 
	
		
			
				|  |  | -        rpn_nms_thresh=0.7,
 | 
	
		
			
				|  |  | -        rpn_fg_iou_thresh=0.7,
 | 
	
		
			
				|  |  | -        rpn_bg_iou_thresh=0.3,
 | 
	
		
			
				|  |  | -        rpn_batch_size_per_image=256,
 | 
	
		
			
				|  |  | -        rpn_positive_fraction=0.5,
 | 
	
		
			
				|  |  | -        rpn_score_thresh=0.0,
 | 
	
		
			
				|  |  | -        # Box parameters
 | 
	
		
			
				|  |  | -        box_roi_pool=None,
 | 
	
		
			
				|  |  | -        box_head=None,
 | 
	
		
			
				|  |  | -        box_predictor=None,
 | 
	
		
			
				|  |  | -        box_score_thresh=0.05,
 | 
	
		
			
				|  |  | -        box_nms_thresh=0.5,
 | 
	
		
			
				|  |  | -        box_detections_per_img=100,
 | 
	
		
			
				|  |  | -        box_fg_iou_thresh=0.5,
 | 
	
		
			
				|  |  | -        box_bg_iou_thresh=0.5,
 | 
	
		
			
				|  |  | -        box_batch_size_per_image=512,
 | 
	
		
			
				|  |  | -        box_positive_fraction=0.25,
 | 
	
		
			
				|  |  | -        bbox_reg_weights=None,
 | 
	
		
			
				|  |  | -        **kwargs,
 | 
	
		
			
				|  |  | +    def __init__(self, cfg, **kwargs):
 | 
	
		
			
				|  |  | +        cfg = read_yaml(cfg)
 | 
	
		
			
				|  |  | +        self.cfg=cfg
 | 
	
		
			
				|  |  | +        backbone = cfg['backbone']
 | 
	
		
			
				|  |  | +        num_classes = cfg['num_classes']
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        if backbone == 'resnet50_fpn':
 | 
	
		
			
				|  |  | +            backbone=backbone_factory.get_resnet50_fpn()
 | 
	
		
			
				|  |  | +            print(f'out_chanenels:{backbone.out_channels}')
 | 
	
		
			
				|  |  | +            self.__construct__(backbone=backbone, num_classes=num_classes, **kwargs)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def __construct__(
 | 
	
		
			
				|  |  | +            self,
 | 
	
		
			
				|  |  | +            backbone,
 | 
	
		
			
				|  |  | +            num_classes=None,
 | 
	
		
			
				|  |  | +            # transform parameters
 | 
	
		
			
				|  |  | +            min_size=512,
 | 
	
		
			
				|  |  | +            max_size=1333,
 | 
	
		
			
				|  |  | +            image_mean=None,
 | 
	
		
			
				|  |  | +            image_std=None,
 | 
	
		
			
				|  |  | +            # RPN parameters
 | 
	
		
			
				|  |  | +            rpn_anchor_generator=None,
 | 
	
		
			
				|  |  | +            rpn_head=None,
 | 
	
		
			
				|  |  | +            rpn_pre_nms_top_n_train=2000,
 | 
	
		
			
				|  |  | +            rpn_pre_nms_top_n_test=1000,
 | 
	
		
			
				|  |  | +            rpn_post_nms_top_n_train=2000,
 | 
	
		
			
				|  |  | +            rpn_post_nms_top_n_test=1000,
 | 
	
		
			
				|  |  | +            rpn_nms_thresh=0.7,
 | 
	
		
			
				|  |  | +            rpn_fg_iou_thresh=0.7,
 | 
	
		
			
				|  |  | +            rpn_bg_iou_thresh=0.3,
 | 
	
		
			
				|  |  | +            rpn_batch_size_per_image=256,
 | 
	
		
			
				|  |  | +            rpn_positive_fraction=0.5,
 | 
	
		
			
				|  |  | +            rpn_score_thresh=0.0,
 | 
	
		
			
				|  |  | +            # Box parameters
 | 
	
		
			
				|  |  | +            box_roi_pool=None,
 | 
	
		
			
				|  |  | +            box_head=None,
 | 
	
		
			
				|  |  | +            box_predictor=None,
 | 
	
		
			
				|  |  | +            box_score_thresh=0.05,
 | 
	
		
			
				|  |  | +            box_nms_thresh=0.5,
 | 
	
		
			
				|  |  | +            box_detections_per_img=100,
 | 
	
		
			
				|  |  | +            box_fg_iou_thresh=0.5,
 | 
	
		
			
				|  |  | +            box_bg_iou_thresh=0.5,
 | 
	
		
			
				|  |  | +            box_batch_size_per_image=512,
 | 
	
		
			
				|  |  | +            box_positive_fraction=0.25,
 | 
	
		
			
				|  |  | +            bbox_reg_weights=None,
 | 
	
		
			
				|  |  | +            # line parameters
 | 
	
		
			
				|  |  | +            line_head=None,
 | 
	
		
			
				|  |  | +            line_predictor=None,
 | 
	
		
			
				|  |  | +            **kwargs,
 | 
	
		
			
				|  |  |      ):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          if not hasattr(backbone, "out_channels"):
 | 
	
	
		
			
				|  | @@ -227,6 +128,13 @@ class LineNet(BaseDetectionNet):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          out_channels = backbone.out_channels
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +        if line_head is None:
 | 
	
		
			
				|  |  | +            num_class = 5
 | 
	
		
			
				|  |  | +            line_head = LineRCNNHeads(out_channels, num_class)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        if line_predictor is None:
 | 
	
		
			
				|  |  | +            line_predictor = LineRCNNPredictor(self.cfg)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |          if rpn_anchor_generator is None:
 | 
	
		
			
				|  |  |              rpn_anchor_generator = _default_anchorgen()
 | 
	
		
			
				|  |  |          if rpn_head is None:
 | 
	
	
		
			
				|  | @@ -254,7 +162,7 @@ class LineNet(BaseDetectionNet):
 | 
	
		
			
				|  |  |          if box_head is None:
 | 
	
		
			
				|  |  |              resolution = box_roi_pool.output_size[0]
 | 
	
		
			
				|  |  |              representation_size = 1024
 | 
	
		
			
				|  |  | -            box_head = TwoMLPHead(out_channels * resolution**2, representation_size)
 | 
	
		
			
				|  |  | +            box_head = TwoMLPHead(out_channels * resolution ** 2, representation_size)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          if box_predictor is None:
 | 
	
		
			
				|  |  |              representation_size = 1024
 | 
	
	
		
			
				|  | @@ -265,6 +173,8 @@ class LineNet(BaseDetectionNet):
 | 
	
		
			
				|  |  |              box_roi_pool,
 | 
	
		
			
				|  |  |              box_head,
 | 
	
		
			
				|  |  |              box_predictor,
 | 
	
		
			
				|  |  | +            line_head,
 | 
	
		
			
				|  |  | +            line_predictor,
 | 
	
		
			
				|  |  |              box_fg_iou_thresh,
 | 
	
		
			
				|  |  |              box_bg_iou_thresh,
 | 
	
		
			
				|  |  |              box_batch_size_per_image,
 | 
	
	
		
			
				|  | @@ -283,6 +193,17 @@ class LineNet(BaseDetectionNet):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          super().__init__(backbone, rpn, roi_heads, transform)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +        self.roi_heads = roi_heads
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        self.roi_heads.line_head = line_head
 | 
	
		
			
				|  |  | +        self.roi_heads.line_predictor = line_predictor
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def train_by_cfg(self, cfg):
 | 
	
		
			
				|  |  | +        # cfg = read_yaml(cfg)
 | 
	
		
			
				|  |  | +        self.trainer = Trainer()
 | 
	
		
			
				|  |  | +        self.trainer.train_cfg(model=self,cfg=cfg)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  class TwoMLPHead(nn.Module):
 | 
	
		
			
				|  |  |      """
 | 
	
	
		
			
				|  | @@ -310,11 +231,11 @@ class TwoMLPHead(nn.Module):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  class LineNetConvFCHead(nn.Sequential):
 | 
	
		
			
				|  |  |      def __init__(
 | 
	
		
			
				|  |  | -        self,
 | 
	
		
			
				|  |  | -        input_size: Tuple[int, int, int],
 | 
	
		
			
				|  |  | -        conv_layers: List[int],
 | 
	
		
			
				|  |  | -        fc_layers: List[int],
 | 
	
		
			
				|  |  | -        norm_layer: Optional[Callable[..., nn.Module]] = None,
 | 
	
		
			
				|  |  | +            self,
 | 
	
		
			
				|  |  | +            input_size: Tuple[int, int, int],
 | 
	
		
			
				|  |  | +            conv_layers: List[int],
 | 
	
		
			
				|  |  | +            fc_layers: List[int],
 | 
	
		
			
				|  |  | +            norm_layer: Optional[Callable[..., nn.Module]] = None,
 | 
	
		
			
				|  |  |      ):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          Args:
 | 
	
	
		
			
				|  | @@ -469,13 +390,13 @@ class LineNet_MobileNet_V3_Large_320_FPN_Weights(WeightsEnum):
 | 
	
		
			
				|  |  |      weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
 | 
	
		
			
				|  |  |  )
 | 
	
		
			
				|  |  |  def linenet_resnet50_fpn(
 | 
	
		
			
				|  |  | -    *,
 | 
	
		
			
				|  |  | -    weights: Optional[LineNet_ResNet50_FPN_Weights] = None,
 | 
	
		
			
				|  |  | -    progress: bool = True,
 | 
	
		
			
				|  |  | -    num_classes: Optional[int] = None,
 | 
	
		
			
				|  |  | -    weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
 | 
	
		
			
				|  |  | -    trainable_backbone_layers: Optional[int] = None,
 | 
	
		
			
				|  |  | -    **kwargs: Any,
 | 
	
		
			
				|  |  | +        *,
 | 
	
		
			
				|  |  | +        weights: Optional[LineNet_ResNet50_FPN_Weights] = None,
 | 
	
		
			
				|  |  | +        progress: bool = True,
 | 
	
		
			
				|  |  | +        num_classes: Optional[int] = None,
 | 
	
		
			
				|  |  | +        weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
 | 
	
		
			
				|  |  | +        trainable_backbone_layers: Optional[int] = None,
 | 
	
		
			
				|  |  | +        **kwargs: Any,
 | 
	
		
			
				|  |  |  ) -> LineNet:
 | 
	
		
			
				|  |  |      """
 | 
	
		
			
				|  |  |      Faster R-CNN model with a ResNet-50-FPN backbone from the `Faster R-CNN: Towards Real-Time Object
 | 
	
	
		
			
				|  | @@ -587,14 +508,14 @@ def linenet_resnet50_fpn(
 | 
	
		
			
				|  |  |      weights=("pretrained", LineNet_ResNet50_FPN_V2_Weights.COCO_V1),
 | 
	
		
			
				|  |  |      weights_backbone=("pretrained_backbone", ResNet50_Weights.IMAGENET1K_V1),
 | 
	
		
			
				|  |  |  )
 | 
	
		
			
				|  |  | -def fasterrcnn_resnet50_fpn_v2(
 | 
	
		
			
				|  |  | -    *,
 | 
	
		
			
				|  |  | -    weights: Optional[LineNet_ResNet50_FPN_V2_Weights] = None,
 | 
	
		
			
				|  |  | -    progress: bool = True,
 | 
	
		
			
				|  |  | -    num_classes: Optional[int] = None,
 | 
	
		
			
				|  |  | -    weights_backbone: Optional[ResNet50_Weights] = None,
 | 
	
		
			
				|  |  | -    trainable_backbone_layers: Optional[int] = None,
 | 
	
		
			
				|  |  | -    **kwargs: Any,
 | 
	
		
			
				|  |  | +def linenet_resnet50_fpn_v2(
 | 
	
		
			
				|  |  | +        *,
 | 
	
		
			
				|  |  | +        weights: Optional[LineNet_ResNet50_FPN_V2_Weights] = None,
 | 
	
		
			
				|  |  | +        progress: bool = True,
 | 
	
		
			
				|  |  | +        num_classes: Optional[int] = None,
 | 
	
		
			
				|  |  | +        weights_backbone: Optional[ResNet50_Weights] = None,
 | 
	
		
			
				|  |  | +        trainable_backbone_layers: Optional[int] = None,
 | 
	
		
			
				|  |  | +        **kwargs: Any,
 | 
	
		
			
				|  |  |  ) -> LineNet:
 | 
	
		
			
				|  |  |      """
 | 
	
		
			
				|  |  |      Constructs an improved Faster R-CNN model with a ResNet-50-FPN backbone from `Benchmarking Detection
 | 
	
	
		
			
				|  | @@ -663,13 +584,13 @@ def fasterrcnn_resnet50_fpn_v2(
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def _linenet_mobilenet_v3_large_fpn(
 | 
	
		
			
				|  |  | -    *,
 | 
	
		
			
				|  |  | -    weights: Optional[Union[LineNet_MobileNet_V3_Large_FPN_Weights, LineNet_MobileNet_V3_Large_320_FPN_Weights]],
 | 
	
		
			
				|  |  | -    progress: bool,
 | 
	
		
			
				|  |  | -    num_classes: Optional[int],
 | 
	
		
			
				|  |  | -    weights_backbone: Optional[MobileNet_V3_Large_Weights],
 | 
	
		
			
				|  |  | -    trainable_backbone_layers: Optional[int],
 | 
	
		
			
				|  |  | -    **kwargs: Any,
 | 
	
		
			
				|  |  | +        *,
 | 
	
		
			
				|  |  | +        weights: Optional[Union[LineNet_MobileNet_V3_Large_FPN_Weights, LineNet_MobileNet_V3_Large_320_FPN_Weights]],
 | 
	
		
			
				|  |  | +        progress: bool,
 | 
	
		
			
				|  |  | +        num_classes: Optional[int],
 | 
	
		
			
				|  |  | +        weights_backbone: Optional[MobileNet_V3_Large_Weights],
 | 
	
		
			
				|  |  | +        trainable_backbone_layers: Optional[int],
 | 
	
		
			
				|  |  | +        **kwargs: Any,
 | 
	
		
			
				|  |  |  ) -> LineNet:
 | 
	
		
			
				|  |  |      if weights is not None:
 | 
	
		
			
				|  |  |          weights_backbone = None
 | 
	
	
		
			
				|  | @@ -684,14 +605,14 @@ def _linenet_mobilenet_v3_large_fpn(
 | 
	
		
			
				|  |  |      backbone = mobilenet_v3_large(weights=weights_backbone, progress=progress, norm_layer=norm_layer)
 | 
	
		
			
				|  |  |      backbone = _mobilenet_extractor(backbone, True, trainable_backbone_layers)
 | 
	
		
			
				|  |  |      anchor_sizes = (
 | 
	
		
			
				|  |  | -        (
 | 
	
		
			
				|  |  | -            32,
 | 
	
		
			
				|  |  | -            64,
 | 
	
		
			
				|  |  | -            128,
 | 
	
		
			
				|  |  | -            256,
 | 
	
		
			
				|  |  | -            512,
 | 
	
		
			
				|  |  | -        ),
 | 
	
		
			
				|  |  | -    ) * 3
 | 
	
		
			
				|  |  | +                       (
 | 
	
		
			
				|  |  | +                           32,
 | 
	
		
			
				|  |  | +                           64,
 | 
	
		
			
				|  |  | +                           128,
 | 
	
		
			
				|  |  | +                           256,
 | 
	
		
			
				|  |  | +                           512,
 | 
	
		
			
				|  |  | +                       ),
 | 
	
		
			
				|  |  | +                   ) * 3
 | 
	
		
			
				|  |  |      aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
 | 
	
		
			
				|  |  |      model = LineNet(
 | 
	
		
			
				|  |  |          backbone, num_classes, rpn_anchor_generator=AnchorGenerator(anchor_sizes, aspect_ratios), **kwargs
 | 
	
	
		
			
				|  | @@ -709,13 +630,13 @@ def _linenet_mobilenet_v3_large_fpn(
 | 
	
		
			
				|  |  |      weights_backbone=("pretrained_backbone", MobileNet_V3_Large_Weights.IMAGENET1K_V1),
 | 
	
		
			
				|  |  |  )
 | 
	
		
			
				|  |  |  def linenet_mobilenet_v3_large_320_fpn(
 | 
	
		
			
				|  |  | -    *,
 | 
	
		
			
				|  |  | -    weights: Optional[LineNet_MobileNet_V3_Large_320_FPN_Weights] = None,
 | 
	
		
			
				|  |  | -    progress: bool = True,
 | 
	
		
			
				|  |  | -    num_classes: Optional[int] = None,
 | 
	
		
			
				|  |  | -    weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
 | 
	
		
			
				|  |  | -    trainable_backbone_layers: Optional[int] = None,
 | 
	
		
			
				|  |  | -    **kwargs: Any,
 | 
	
		
			
				|  |  | +        *,
 | 
	
		
			
				|  |  | +        weights: Optional[LineNet_MobileNet_V3_Large_320_FPN_Weights] = None,
 | 
	
		
			
				|  |  | +        progress: bool = True,
 | 
	
		
			
				|  |  | +        num_classes: Optional[int] = None,
 | 
	
		
			
				|  |  | +        weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
 | 
	
		
			
				|  |  | +        trainable_backbone_layers: Optional[int] = None,
 | 
	
		
			
				|  |  | +        **kwargs: Any,
 | 
	
		
			
				|  |  |  ) -> LineNet:
 | 
	
		
			
				|  |  |      """
 | 
	
		
			
				|  |  |      Low resolution Faster R-CNN model with a MobileNetV3-Large backbone tuned for mobile use cases.
 | 
	
	
		
			
				|  | @@ -783,13 +704,13 @@ def linenet_mobilenet_v3_large_320_fpn(
 | 
	
		
			
				|  |  |      weights_backbone=("pretrained_backbone", MobileNet_V3_Large_Weights.IMAGENET1K_V1),
 | 
	
		
			
				|  |  |  )
 | 
	
		
			
				|  |  |  def linenet_mobilenet_v3_large_fpn(
 | 
	
		
			
				|  |  | -    *,
 | 
	
		
			
				|  |  | -    weights: Optional[LineNet_MobileNet_V3_Large_FPN_Weights] = None,
 | 
	
		
			
				|  |  | -    progress: bool = True,
 | 
	
		
			
				|  |  | -    num_classes: Optional[int] = None,
 | 
	
		
			
				|  |  | -    weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
 | 
	
		
			
				|  |  | -    trainable_backbone_layers: Optional[int] = None,
 | 
	
		
			
				|  |  | -    **kwargs: Any,
 | 
	
		
			
				|  |  | +        *,
 | 
	
		
			
				|  |  | +        weights: Optional[LineNet_MobileNet_V3_Large_FPN_Weights] = None,
 | 
	
		
			
				|  |  | +        progress: bool = True,
 | 
	
		
			
				|  |  | +        num_classes: Optional[int] = None,
 | 
	
		
			
				|  |  | +        weights_backbone: Optional[MobileNet_V3_Large_Weights] = MobileNet_V3_Large_Weights.IMAGENET1K_V1,
 | 
	
		
			
				|  |  | +        trainable_backbone_layers: Optional[int] = None,
 | 
	
		
			
				|  |  | +        **kwargs: Any,
 | 
	
		
			
				|  |  |  ) -> LineNet:
 | 
	
		
			
				|  |  |      """
 | 
	
		
			
				|  |  |      Constructs a high resolution Faster R-CNN model with a MobileNetV3-Large FPN backbone.
 |