lstlm
/
lcnn


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
							import torch
import torch.nn as nn
import torchvision
from typing import Dict, List, Optional, Tuple
import torch.nn.functional as F
from torchvision.ops import MultiScaleRoIAlign
from torchvision.models.detection.faster_rcnn import TwoMLPHead, FastRCNNPredictor
from torchvision.models.detection.transform import GeneralizedRCNNTransform


def get_model(num_classes):
    # 加载预训练的ResNet-50 FPN backbone
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

    # 获取分类器的输入特征数
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # 替换分类器以适应新的类别数量
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

    return model


def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
    # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
    """
    Computes the loss for Faster R-CNN.

    Args:
        class_logits (Tensor)
        box_regression (Tensor)
        labels (list[BoxList])
        regression_targets (Tensor)

    Returns:
        classification_loss (Tensor)
        box_loss (Tensor)
    """

    labels = torch.cat(labels, dim=0)
    regression_targets = torch.cat(regression_targets, dim=0)

    classification_loss = F.cross_entropy(class_logits, labels)

    # get indices that correspond to the regression targets for
    # the corresponding ground truth labels, to be used with
    # advanced indexing
    sampled_pos_inds_subset = torch.where(labels > 0)[0]
    labels_pos = labels[sampled_pos_inds_subset]
    N, num_classes = class_logits.shape
    box_regression = box_regression.reshape(N, box_regression.size(-1) // 4, 4)

    box_loss = F.smooth_l1_loss(
        box_regression[sampled_pos_inds_subset, labels_pos],
        regression_targets[sampled_pos_inds_subset],
        beta=1 / 9,
        reduction="sum",
    )
    box_loss = box_loss / labels.numel()

    return classification_loss, box_loss


class Fasterrcnn_resnet50(nn.Module):
    def __init__(self, num_classes=5, num_stacks=1):
        super(Fasterrcnn_resnet50, self).__init__()

        self.model = get_model(num_classes=5)
        self.backbone = self.model.backbone

        self.box_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=16, sampling_ratio=2)

        out_channels = self.backbone.out_channels
        resolution = self.box_roi_pool.output_size[0]
        representation_size = 1024
        self.box_head = TwoMLPHead(out_channels * resolution ** 2, representation_size)

        self.box_predictor = FastRCNNPredictor(representation_size, num_classes)

        # 多任务输出层
        self.score_layers = nn.ModuleList([
            nn.Sequential(
                nn.Conv2d(256, 128, kernel_size=3, padding=1),
                nn.BatchNorm2d(128),
                nn.ReLU(inplace=True),
                nn.Conv2d(128, num_classes, kernel_size=1)
            )
            for _ in range(num_stacks)
        ])

    def forward(self, x, target1, train_or_val, image_shapes=(512, 512)):

        transform = GeneralizedRCNNTransform(min_size=512, max_size=1333, image_mean=[0.485, 0.456, 0.406],
                                             image_std=[0.229, 0.224, 0.225])
        images, targets = transform(x, target1)
        x_ = self.backbone(images.tensors)

        # x_ = self.backbone(x)  # '0'  '1'  '2'  '3'   'pool'
        # print(f'backbone:{self.backbone}')
        # print(f'Fasterrcnn_resnet50 x_:{x_}')
        feature_ = x_['0']  # 图片特征
        outputs = []
        for score_layer in self.score_layers:
            output = score_layer(feature_)
            outputs.append(output)  # 多头

        if train_or_val == "training":
            loss_box = self.model(x, target1)
            return outputs, feature_, loss_box
        else:
            box_all = self.model(x, target1)
            return outputs, feature_, box_all


def fasterrcnn_resnet50(**kwargs):
    model = Fasterrcnn_resnet50(
        num_classes=kwargs.get("num_classes", 5),
        num_stacks=kwargs.get("num_stacks", 1)
    )
    return model