lstlm
/
pokouqiege


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
							import torch
import torch.nn as nn
import torch.nn.functional as F


from torch import Tensor
from typing import Any, Callable, List, Optional, Type, Union
from torchvision.models.detection.backbone_utils import BackboneWithFPN

# ----------------------------
# 工具函数
# ----------------------------

def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
    """3x3 convolution with padding"""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        stride=stride,
        padding=dilation,
        groups=groups,
        bias=False,
        dilation=dilation,
    )

def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
    """1x1 convolution"""

    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)

# ----------------------------
# Bottleneck Block（你提供的）
# ----------------------------

class Bottleneck(nn.Module):
    expansion: int = 4

    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        groups: int = 1,
        base_width: int = 64,
        dilation: int = 1,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.0)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

# ----------------------------
# ResNet 主类
# ----------------------------

def resnet18fpn(out_channels=256):
    backbone = ResNet(Bottleneck,[2,2,2])
    return_layers = {
        'encoder0': '0',
        'encoder1': '1',
        'encoder2': '2',
        'encoder3': '3',
        # 'encoder4': '5'
    }

    # in_channels_list = [self.inplanes, 64, 128, 256, 512]
    # in_channels_list = [64, 256, 512, 1024, 2048]
    in_channels_list = [64, 256, 512, 1024]

    return BackboneWithFPN(
        backbone,
        return_layers=return_layers,
        in_channels_list=in_channels_list,
        out_channels=out_channels,
    )

def resnet50fpn(out_channels=256):
    backbone = ResNet(Bottleneck,[3,4,6])
    return_layers = {
        'encoder0': '0',
        'encoder1': '1',
        'encoder2': '2',
        'encoder3': '3',
        # 'encoder4': '5'
    }

    # in_channels_list = [self.inplanes, 64, 128, 256, 512]
    # in_channels_list = [64, 256, 512, 1024, 2048]
    in_channels_list = [64, 256, 512, 1024]

    return BackboneWithFPN(
        backbone,
        return_layers=return_layers,
        in_channels_list=in_channels_list,
        out_channels=out_channels,
    )


class ResNet(nn.Module):
    def __init__(self, block: Type[Union[Bottleneck]], layers: List[int],):
        super(ResNet, self).__init__()
        self._norm_layer = nn.BatchNorm2d
        self.inplanes = 64
        self.dilation = 1
        self.groups = 1
        self.base_width = 64


        self.encoder0 = nn.Sequential(
            nn.Conv2d(3, self.inplanes, kernel_size=3,padding=1, bias=False),
            self._norm_layer(self.inplanes),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        )
        self.encoder1 = self._make_layer(block, 64, layers[0],stride=2)
        self.encoder2 = self._make_layer(block, 128, layers[1], stride=2)
        self.encoder3 = self._make_layer(block, 256, layers[2], stride=2)
        # self.encoder4 = self._make_layer(block, 512, 3, stride=2)
        # self.encoder5 = self._make_layer(block, 512, 3, stride=2)
        # self.body = nn.ModuleDict({
        #     'encoder0': self.encoder0,
        #     'encoder1': self.encoder1,
        #     'encoder2': self.encoder2,
        #     'encoder3': self.encoder3,
        #     'encoder4': self.encoder4
        # })
        # self.fpn = self.get_convnext_fpn(
        #     backbone=self.body,
        #     trainable_layers=5,
        #     returned_layers=[0, 1, 2, 3, 4],
        #     extra_blocks=None,
        #     norm_layer=None
        # )


    def _make_layer(self, block: Type[Union[Bottleneck]], planes: int, blocks: int,
                    stride: int = 1, dilate: bool = False) -> nn.Sequential:
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(
            block(
                self.inplanes, planes, stride, downsample, self.groups, self.base_width,
                previous_dilation, norm_layer
            )
        )
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(
                block(
                    self.inplanes,
                    planes,
                    groups=self.groups,
                    base_width=self.base_width,
                    dilation=self.dilation,
                    norm_layer=norm_layer,
                )
            )

        return nn.Sequential(*layers)

    def _make_decoder_layer(self, block: Type[Union[Bottleneck]], in_channels: int,
                            out_channels: int, blocks: int = 1) -> nn.Sequential:
        """
        构建解码器部分的残差块
        """
        assert in_channels == out_channels, "in_channels must equal out_channels"
        layers = []
        for _ in range(blocks):
            layers.append(
                block(
                    in_channels,
                    in_channels // block.expansion,
                    groups=self.groups,
                    base_width=self.base_width,
                    dilation=self.dilation,
                    norm_layer=self._norm_layer,
                )
            )
        return nn.Sequential(*layers)

    def _make_upsample_layer(self, in_channels: int, out_channels: int) -> nn.Module:
        """
        使用转置卷积进行上采样
        """
        return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)

    def _forward_impl(self, x: torch.Tensor) -> torch.Tensor:

        # out = self.fpn(x)
        # print("ssssssss")
        x0=self.encoder0(x)
        print(f'x0:{x0.shape}')
        x1=self.encoder1(x0)
        print(f'x1:{x1.shape}')
        x2= self.encoder2(x1)
        print(f'x2:{x2.shape}')
        x3= self.encoder3(x2)
        print(f'x3:{x3.shape}')
        # x4= self.encoder4(x3)
        # print(f'x4:{x4.shape}')
        out={
            'encoder0':x0,
            'encoder1': x1,
            'encoder2': x2,
            'encoder3': x3,
            # 'encoder4': x4,
        }
        return out

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self._forward_impl(x)


# ----------------------------
# 测试代码
# ----------------------------

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # model = ResNet(Bottleneck, n_classes=5).to(device)
    # print(model)
    model=resnet50fpn().to(device)


    input_tensor = torch.randn(1, 3, 512, 512).to(device)
    output_tensor = model(input_tensor)

    backbone = ResNet(Bottleneck,[3,4,6]).to(device)
    features = backbone(input_tensor)
    print("Raw backbone output:", list(features.keys()))
    print(f"Input shape: {input_tensor.shape}")
    print(f'feat_names:{list(output_tensor.keys())}')
    print(f"Output shape0: {output_tensor['0'].shape}")
    print(f"Output shape1: {output_tensor['1'].shape}")
    print(f"Output shape2: {output_tensor['2'].shape}")
    print(f"Output shape3: {output_tensor['3'].shape}")
    # print(f"Output shape4: {output_tensor['5'].shape}")
    print(f"Output shape5: {output_tensor['pool'].shape}")