| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330 |
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- from torch import Tensor
- from typing import Any, Callable, List, Optional, Type, Union
- from torchvision.models.detection.backbone_utils import BackboneWithFPN
- # ----------------------------
- # å·¥å
·å½æ°
- # ----------------------------
- def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
- """3x3 convolution with padding"""
- return nn.Conv2d(
- in_planes,
- out_planes,
- kernel_size=3,
- stride=stride,
- padding=dilation,
- groups=groups,
- bias=False,
- dilation=dilation,
- )
- def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
- """1x1 convolution"""
- return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
- # ----------------------------
- # Bottleneck Blockï¼ä½ æä¾çï¼
- # ----------------------------
- class Bottleneck(nn.Module):
- expansion: int = 4
- def __init__(
- self,
- inplanes: int,
- planes: int,
- stride: int = 1,
- downsample: Optional[nn.Module] = None,
- groups: int = 1,
- base_width: int = 64,
- dilation: int = 1,
- norm_layer: Optional[Callable[..., nn.Module]] = None,
- ) -> None:
- super().__init__()
- if norm_layer is None:
- norm_layer = nn.BatchNorm2d
- width = int(planes * (base_width / 64.0)) * groups
- # Both self.conv2 and self.downsample layers downsample the input when stride != 1
- self.conv1 = conv1x1(inplanes, width)
- self.bn1 = norm_layer(width)
- self.conv2 = conv3x3(width, width, stride, groups, dilation)
- self.bn2 = norm_layer(width)
- self.conv3 = conv1x1(width, planes * self.expansion)
- self.bn3 = norm_layer(planes * self.expansion)
- self.relu = nn.ReLU(inplace=True)
- self.downsample = downsample
- self.stride = stride
- def forward(self, x: Tensor) -> Tensor:
- identity = x
- out = self.conv1(x)
- out = self.bn1(out)
- out = self.relu(out)
- out = self.conv2(out)
- out = self.bn2(out)
- out = self.relu(out)
- out = self.conv3(out)
- out = self.bn3(out)
- if self.downsample is not None:
- identity = self.downsample(x)
- out += identity
- out = self.relu(out)
- return out
- # ----------------------------
- # ResNet 主类
- # ----------------------------
- def resnet18fpn(out_channels=256):
- backbone = ResNet(Bottleneck,[2,2,2,2])
- return_layers = {
- 'encoder0': '0',
- 'encoder1': '1',
- 'encoder2': '2',
- 'encoder3': '3',
- 'encoder4': '4'
- }
- # in_channels_list = [self.inplanes, 64, 128, 256, 512]
- in_channels_list = [64, 256, 512, 1024, 2048]
- # in_channels_list = [64, 256, 512, 1024]
- return BackboneWithFPN(
- backbone,
- return_layers=return_layers,
- in_channels_list=in_channels_list,
- out_channels=out_channels,
- )
- def resnet50fpn(out_channels=256):
- backbone = ResNet(Bottleneck,[3,4,6,3])
- return_layers = {
- 'encoder0': '0',
- 'encoder1': '1',
- 'encoder2': '2',
- 'encoder3': '3',
- 'encoder4': '4'
- }
- # in_channels_list = [self.inplanes, 64, 128, 256, 512]
- in_channels_list = [64, 256, 512, 1024, 2048]
- # in_channels_list = [64, 256, 512, 1024]
- return BackboneWithFPN(
- backbone,
- return_layers=return_layers,
- in_channels_list=in_channels_list,
- out_channels=out_channels,
- )
- def resnet101fpn(out_channels=256):
- backbone = ResNet(Bottleneck,[3, 4, 23, 3])
- return_layers = {
- 'encoder0': '0',
- 'encoder1': '1',
- 'encoder2': '2',
- 'encoder3': '3',
- 'encoder4': '4'
- }
- # in_channels_list = [self.inplanes, 64, 128, 256, 512]
- in_channels_list = [64, 256, 512, 1024, 2048]
- # in_channels_list = [64, 256, 512, 1024]
- return BackboneWithFPN(
- backbone,
- return_layers=return_layers,
- in_channels_list=in_channels_list,
- out_channels=out_channels,
- )
- def resnet152fpn(out_channels=256):
- backbone = ResNet(Bottleneck,[3, 8, 36, 3])
- return_layers = {
- 'encoder0': '0',
- 'encoder1': '1',
- 'encoder2': '2',
- 'encoder3': '3',
- 'encoder4': '4'
- }
- # in_channels_list = [self.inplanes, 64, 128, 256, 512]
- in_channels_list = [64, 256, 512, 1024, 2048]
- # in_channels_list = [64, 256, 512, 1024]
- return BackboneWithFPN(
- backbone,
- return_layers=return_layers,
- in_channels_list=in_channels_list,
- out_channels=out_channels,
- )
- class ResNet(nn.Module):
- def __init__(self, block: Type[Union[Bottleneck]], layers: List[int],):
- super(ResNet, self).__init__()
- self._norm_layer = nn.BatchNorm2d
- self.inplanes = 64
- self.dilation = 1
- self.groups = 1
- self.base_width = 64
- self.encoder0 = nn.Sequential(
- nn.Conv2d(3, self.inplanes, kernel_size=3,padding=1,stride=1, bias=False),
- self._norm_layer(self.inplanes),
- nn.ReLU(inplace=True),
- nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
- )
- # self.encoder0 = nn.Sequential(
- # nn.Conv2d(3, self.inplanes, kernel_size=3, padding=1, bias=False),
- # self._norm_layer(self.inplanes),
- # nn.ReLU(inplace=True),
- # nn.Conv2d(self.inplanes, self.inplanes, kernel_size=3, padding=1, bias=False),
- # self._norm_layer(self.inplanes),
- # nn.ReLU(inplace=True),
- # nn.Conv2d(self.inplanes, self.inplanes, kernel_size=3, padding=1, bias=False),
- # self._norm_layer(self.inplanes),
- # nn.ReLU(inplace=True),
- # nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
- # )
- self.encoder1 = self._make_layer(block, 64, layers[0],stride=2)
- self.encoder2 = self._make_layer(block, 128, layers[1], stride=2)
- self.encoder3 = self._make_layer(block, 256, layers[2], stride=2)
- self.encoder4 = self._make_layer(block, 512, layers[3], stride=2)
- def _make_layer(self, block: Type[Union[Bottleneck]], planes: int, blocks: int,
- stride: int = 1, dilate: bool = False) -> nn.Sequential:
- norm_layer = self._norm_layer
- downsample = None
- previous_dilation = self.dilation
- if dilate:
- self.dilation *= stride
- stride = 1
- if stride != 1 or self.inplanes != planes * block.expansion:
- downsample = nn.Sequential(
- conv1x1(self.inplanes, planes * block.expansion, stride),
- norm_layer(planes * block.expansion),
- )
- layers = []
- layers.append(
- block(
- self.inplanes, planes, stride, downsample, self.groups, self.base_width,
- previous_dilation, norm_layer
- )
- )
- self.inplanes = planes * block.expansion
- for _ in range(1, blocks):
- layers.append(
- block(
- self.inplanes,
- planes,
- groups=self.groups,
- base_width=self.base_width,
- dilation=self.dilation,
- norm_layer=norm_layer,
- )
- )
- return nn.Sequential(*layers)
- def _make_decoder_layer(self, block: Type[Union[Bottleneck]], in_channels: int,
- out_channels: int, blocks: int = 1) -> nn.Sequential:
- """
- æå»ºè§£ç å¨é¨åçæ®å·®å
- """
- assert in_channels == out_channels, "in_channels must equal out_channels"
- layers = []
- for _ in range(blocks):
- layers.append(
- block(
- in_channels,
- in_channels // block.expansion,
- groups=self.groups,
- base_width=self.base_width,
- dilation=self.dilation,
- norm_layer=self._norm_layer,
- )
- )
- return nn.Sequential(*layers)
- def _make_upsample_layer(self, in_channels: int, out_channels: int) -> nn.Module:
- """
- 使ç¨è½¬ç½®å·ç§¯è¿è¡ä¸éæ ·
- """
- return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
- def _forward_impl(self, x: torch.Tensor) -> torch.Tensor:
- # out = self.fpn(x)
- # print("ssssssss")
- x0=self.encoder0(x)
- print(f'x0:{x0.shape}')
- x1=self.encoder1(x0)
- print(f'x1:{x1.shape}')
- x2= self.encoder2(x1)
- print(f'x2:{x2.shape}')
- x3= self.encoder3(x2)
- print(f'x3:{x3.shape}')
- x4= self.encoder4(x3)
- print(f'x4:{x4.shape}')
- out={
- 'encoder0':x0,
- 'encoder1': x1,
- 'encoder2': x2,
- 'encoder3': x3,
- 'encoder4': x4,
- }
- return out
- def forward(self, x: torch.Tensor) -> torch.Tensor:
- return self._forward_impl(x)
- # ----------------------------
- # æµè¯ä»£ç
- # ----------------------------
- if __name__ == "__main__":
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
- # model = ResNet(Bottleneck, n_classes=5).to(device)
- # print(model)
- # model=resnet50fpn().to(device)
- model = resnet18fpn().to(device)
- input_tensor = torch.randn(1, 3, 512, 512).to(device)
- output_tensor = model(input_tensor)
- backbone = ResNet(Bottleneck,[3,4,6,3]).to(device)
- features = backbone(input_tensor)
- print("Raw backbone output:", list(features.keys()))
- print(f"Input shape: {input_tensor.shape}")
- print(f'feat_names:{list(output_tensor.keys())}')
- print(f"Output shape0: {output_tensor['0'].shape}")
- print(f"Output shape1: {output_tensor['1'].shape}")
- print(f"Output shape2: {output_tensor['2'].shape}")
- print(f"Output shape3: {output_tensor['3'].shape}")
- print(f"Output shape4: {output_tensor['4'].shape}")
- print(f"Output shape5: {output_tensor['pool'].shape}")
|