import torch import torch.nn as nn import torchvision from typing import Dict, List, Optional, Tuple import torch.nn.functional as F from torchvision.ops import MultiScaleRoIAlign from torchvision.models.detection.faster_rcnn import TwoMLPHead, FastRCNNPredictor from torchvision.models.detection.transform import GeneralizedRCNNTransform def get_model(num_classes): # 加载预训练的ResNet-50 FPN backbone model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) # 获取分类器的输入特征数 in_features = model.roi_heads.box_predictor.cls_score.in_features # 替换分类器以适应新的类别数量 model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes) return model def fastrcnn_loss(class_logits, box_regression, labels, regression_targets): # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor] """ Computes the loss for Faster R-CNN. Args: class_logits (Tensor) box_regression (Tensor) labels (list[BoxList]) regression_targets (Tensor) Returns: classification_loss (Tensor) box_loss (Tensor) """ labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) classification_loss = F.cross_entropy(class_logits, labels) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = torch.where(labels > 0)[0] labels_pos = labels[sampled_pos_inds_subset] N, num_classes = class_logits.shape box_regression = box_regression.reshape(N, box_regression.size(-1) // 4, 4) box_loss = F.smooth_l1_loss( box_regression[sampled_pos_inds_subset, labels_pos], regression_targets[sampled_pos_inds_subset], beta=1 / 9, reduction="sum", ) box_loss = box_loss / labels.numel() return classification_loss, box_loss class Fasterrcnn_resnet50(nn.Module): def __init__(self, num_classes=5, num_stacks=1): super(Fasterrcnn_resnet50, self).__init__() self.model = get_model(num_classes=5) self.backbone = self.model.backbone self.box_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=16, sampling_ratio=2) out_channels = self.backbone.out_channels resolution = self.box_roi_pool.output_size[0] representation_size = 1024 self.box_head = TwoMLPHead(out_channels * resolution ** 2, representation_size) self.box_predictor = FastRCNNPredictor(representation_size, num_classes) # 多任务输出层 self.score_layers = nn.ModuleList([ nn.Sequential( nn.Conv2d(256, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True), nn.Conv2d(128, num_classes, kernel_size=1) ) for _ in range(num_stacks) ]) def forward(self, x, target1, train_or_val, image_shapes=(512, 512)): transform = GeneralizedRCNNTransform(min_size=512, max_size=1333, image_mean=[0.485, 0.456, 0.406], image_std=[0.229, 0.224, 0.225]) images, targets = transform(x, target1) x_ = self.backbone(images.tensors) # x_ = self.backbone(x) # '0' '1' '2' '3' 'pool' # print(f'backbone:{self.backbone}') # print(f'Fasterrcnn_resnet50 x_:{x_}') feature_ = x_['0'] # 图片特征 outputs = [] for score_layer in self.score_layers: output = score_layer(feature_) outputs.append(output) # 多头 if train_or_val == "training": loss_box = self.model(x, target1) return outputs, feature_, loss_box else: box_all = self.model(x, target1) return outputs, feature_, box_all def fasterrcnn_resnet50(**kwargs): model = Fasterrcnn_resnet50( num_classes=kwargs.get("num_classes", 5), num_stacks=kwargs.get("num_stacks", 1) ) return model