123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120 |
- import torch
- import torch.nn as nn
- import torchvision
- from typing import Dict, List, Optional, Tuple
- import torch.nn.functional as F
- from torchvision.ops import MultiScaleRoIAlign
- from torchvision.models.detection.faster_rcnn import TwoMLPHead, FastRCNNPredictor
- from torchvision.models.detection.transform import GeneralizedRCNNTransform
- def get_model(num_classes):
- # 加载预训练的ResNet-50 FPN backbone
- model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
- # 获取分类器的输入特征数
- in_features = model.roi_heads.box_predictor.cls_score.in_features
- # 替换分类器以适应新的类别数量
- model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
- return model
- def fastrcnn_loss(class_logits, box_regression, labels, regression_targets):
- # type: (Tensor, Tensor, List[Tensor], List[Tensor]) -> Tuple[Tensor, Tensor]
- """
- Computes the loss for Faster R-CNN.
- Args:
- class_logits (Tensor)
- box_regression (Tensor)
- labels (list[BoxList])
- regression_targets (Tensor)
- Returns:
- classification_loss (Tensor)
- box_loss (Tensor)
- """
- labels = torch.cat(labels, dim=0)
- regression_targets = torch.cat(regression_targets, dim=0)
- classification_loss = F.cross_entropy(class_logits, labels)
- # get indices that correspond to the regression targets for
- # the corresponding ground truth labels, to be used with
- # advanced indexing
- sampled_pos_inds_subset = torch.where(labels > 0)[0]
- labels_pos = labels[sampled_pos_inds_subset]
- N, num_classes = class_logits.shape
- box_regression = box_regression.reshape(N, box_regression.size(-1) // 4, 4)
- box_loss = F.smooth_l1_loss(
- box_regression[sampled_pos_inds_subset, labels_pos],
- regression_targets[sampled_pos_inds_subset],
- beta=1 / 9,
- reduction="sum",
- )
- box_loss = box_loss / labels.numel()
- return classification_loss, box_loss
- class Fasterrcnn_resnet50(nn.Module):
- def __init__(self, num_classes=5, num_stacks=1):
- super(Fasterrcnn_resnet50, self).__init__()
- self.model = get_model(num_classes=5)
- self.backbone = self.model.backbone
- self.box_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=16, sampling_ratio=2)
- out_channels = self.backbone.out_channels
- resolution = self.box_roi_pool.output_size[0]
- representation_size = 1024
- self.box_head = TwoMLPHead(out_channels * resolution ** 2, representation_size)
- self.box_predictor = FastRCNNPredictor(representation_size, num_classes)
- # 多任务输出层
- self.score_layers = nn.ModuleList([
- nn.Sequential(
- nn.Conv2d(256, 128, kernel_size=3, padding=1),
- nn.BatchNorm2d(128),
- nn.ReLU(inplace=True),
- nn.Conv2d(128, num_classes, kernel_size=1)
- )
- for _ in range(num_stacks)
- ])
- def forward(self, x, target1, train_or_val, image_shapes=(512, 512)):
- transform = GeneralizedRCNNTransform(min_size=512, max_size=1333, image_mean=[0.485, 0.456, 0.406],
- image_std=[0.229, 0.224, 0.225])
- images, targets = transform(x, target1)
- x_ = self.backbone(images.tensors)
- # x_ = self.backbone(x) # '0' '1' '2' '3' 'pool'
- # print(f'backbone:{self.backbone}')
- # print(f'Fasterrcnn_resnet50 x_:{x_}')
- feature_ = x_['0'] # 图片特征
- outputs = []
- for score_layer in self.score_layers:
- output = score_layer(feature_)
- outputs.append(output) # 多头
- if train_or_val == "training":
- loss_box = self.model(x, target1)
- return outputs, feature_, loss_box
- else:
- box_all = self.model(x, target1)
- return outputs, feature_, box_all
- def fasterrcnn_resnet50(**kwargs):
- model = Fasterrcnn_resnet50(
- num_classes=kwargs.get("num_classes", 5),
- num_stacks=kwargs.get("num_stacks", 1)
- )
- return model
|