conv.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
  2. """Convolution modules."""
  3. import math
  4. import numpy as np
  5. import torch
  6. import torch.nn as nn
  7. __all__ = (
  8. "Conv",
  9. "Conv2",
  10. "LightConv",
  11. "DWConv",
  12. "DWConvTranspose2d",
  13. "ConvTranspose",
  14. "Focus",
  15. "GhostConv",
  16. "ChannelAttention",
  17. "SpatialAttention",
  18. "CBAM",
  19. "Concat",
  20. "RepConv",
  21. "Index",
  22. "DSConv"
  23. )
  24. def autopad(k, p=None, d=1): # kernel, padding, dilation
  25. """Pad to 'same' shape outputs."""
  26. if d > 1:
  27. k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
  28. if p is None:
  29. p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
  30. return p
  31. class Conv(nn.Module):
  32. """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
  33. default_act = nn.SiLU() # default activation
  34. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
  35. """Initialize Conv layer with given arguments including activation."""
  36. super().__init__()
  37. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
  38. self.bn = nn.BatchNorm2d(c2)
  39. self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
  40. def forward(self, x):
  41. """Apply convolution, batch normalization and activation to input tensor."""
  42. return self.act(self.bn(self.conv(x)))
  43. def forward_fuse(self, x):
  44. """Apply convolution and activation without batch normalization."""
  45. return self.act(self.conv(x))
  46. class Conv2(Conv):
  47. """Simplified RepConv module with Conv fusing."""
  48. def __init__(self, c1, c2, k=3, s=1, p=None, g=1, d=1, act=True):
  49. """Initialize Conv layer with given arguments including activation."""
  50. super().__init__(c1, c2, k, s, p, g=g, d=d, act=act)
  51. self.cv2 = nn.Conv2d(c1, c2, 1, s, autopad(1, p, d), groups=g, dilation=d, bias=False) # add 1x1 conv
  52. def forward(self, x):
  53. """Apply convolution, batch normalization and activation to input tensor."""
  54. return self.act(self.bn(self.conv(x) + self.cv2(x)))
  55. def forward_fuse(self, x):
  56. """Apply fused convolution, batch normalization and activation to input tensor."""
  57. return self.act(self.bn(self.conv(x)))
  58. def fuse_convs(self):
  59. """Fuse parallel convolutions."""
  60. w = torch.zeros_like(self.conv.weight.data)
  61. i = [x // 2 for x in w.shape[2:]]
  62. w[:, :, i[0] : i[0] + 1, i[1] : i[1] + 1] = self.cv2.weight.data.clone()
  63. self.conv.weight.data += w
  64. self.__delattr__("cv2")
  65. self.forward = self.forward_fuse
  66. class DSConv(nn.Module):
  67. """Depthwise Separable Conv with correct autopad for dilation."""
  68. def __init__(self, c_in, c_out, k=3, s=1, p=None, d=1, bias=False):
  69. super().__init__()
  70. if p is None:
  71. p = (d * (k - 1)) // 2
  72. self.dw = nn.Conv2d(
  73. c_in, c_in, kernel_size=k, stride=s,
  74. padding=p, dilation=d, groups=c_in, bias=bias
  75. )
  76. self.pw = nn.Conv2d(c_in, c_out, 1, 1, 0, bias=bias)
  77. self.bn = nn.BatchNorm2d(c_out)
  78. self.act = nn.SiLU()
  79. def forward(self, x):
  80. x = self.dw(x)
  81. x = self.pw(x)
  82. return self.act(self.bn(x))
  83. class LightConv(nn.Module):
  84. """
  85. Light convolution with args(ch_in, ch_out, kernel).
  86. https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
  87. """
  88. def __init__(self, c1, c2, k=1, act=nn.ReLU()):
  89. """Initialize Conv layer with given arguments including activation."""
  90. super().__init__()
  91. self.conv1 = Conv(c1, c2, 1, act=False)
  92. self.conv2 = DWConv(c2, c2, k, act=act)
  93. def forward(self, x):
  94. """Apply 2 convolutions to input tensor."""
  95. return self.conv2(self.conv1(x))
  96. class DWConv(Conv):
  97. """Depth-wise convolution."""
  98. def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
  99. """Initialize Depth-wise convolution with given parameters."""
  100. super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
  101. class DWConvTranspose2d(nn.ConvTranspose2d):
  102. """Depth-wise transpose convolution."""
  103. def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
  104. """Initialize DWConvTranspose2d class with given parameters."""
  105. super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
  106. class ConvTranspose(nn.Module):
  107. """Convolution transpose 2d layer."""
  108. default_act = nn.SiLU() # default activation
  109. def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
  110. """Initialize ConvTranspose2d layer with batch normalization and activation function."""
  111. super().__init__()
  112. self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn)
  113. self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity()
  114. self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
  115. def forward(self, x):
  116. """Applies transposed convolutions, batch normalization and activation to input."""
  117. return self.act(self.bn(self.conv_transpose(x)))
  118. def forward_fuse(self, x):
  119. """Applies activation and convolution transpose operation to input."""
  120. return self.act(self.conv_transpose(x))
  121. class Focus(nn.Module):
  122. """Focus wh information into c-space."""
  123. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
  124. """Initializes Focus object with user defined channel, convolution, padding, group and activation values."""
  125. super().__init__()
  126. self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
  127. # self.contract = Contract(gain=2)
  128. def forward(self, x):
  129. """
  130. Applies convolution to concatenated tensor and returns the output.
  131. Input shape is (b,c,w,h) and output shape is (b,4c,w/2,h/2).
  132. """
  133. return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
  134. # return self.conv(self.contract(x))
  135. class GhostConv(nn.Module):
  136. """Ghost Convolution https://github.com/huawei-noah/ghostnet."""
  137. def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
  138. """Initializes Ghost Convolution module with primary and cheap operations for efficient feature learning."""
  139. super().__init__()
  140. c_ = c2 // 2 # hidden channels
  141. self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
  142. self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
  143. def forward(self, x):
  144. """Forward propagation through a Ghost Bottleneck layer with skip connection."""
  145. y = self.cv1(x)
  146. return torch.cat((y, self.cv2(y)), 1)
  147. class RepConv(nn.Module):
  148. """
  149. RepConv is a basic rep-style block, including training and deploy status.
  150. This module is used in RT-DETR.
  151. Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
  152. """
  153. default_act = nn.SiLU() # default activation
  154. def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
  155. """Initializes Light Convolution layer with inputs, outputs & optional activation function."""
  156. super().__init__()
  157. assert k == 3 and p == 1
  158. self.g = g
  159. self.c1 = c1
  160. self.c2 = c2
  161. self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
  162. self.bn = nn.BatchNorm2d(num_features=c1) if bn and c2 == c1 and s == 1 else None
  163. self.conv1 = Conv(c1, c2, k, s, p=p, g=g, act=False)
  164. self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)
  165. def forward_fuse(self, x):
  166. """Forward process."""
  167. return self.act(self.conv(x))
  168. def forward(self, x):
  169. """Forward process."""
  170. id_out = 0 if self.bn is None else self.bn(x)
  171. return self.act(self.conv1(x) + self.conv2(x) + id_out)
  172. def get_equivalent_kernel_bias(self):
  173. """Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases."""
  174. kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
  175. kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
  176. kernelid, biasid = self._fuse_bn_tensor(self.bn)
  177. return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
  178. @staticmethod
  179. def _pad_1x1_to_3x3_tensor(kernel1x1):
  180. """Pads a 1x1 tensor to a 3x3 tensor."""
  181. if kernel1x1 is None:
  182. return 0
  183. else:
  184. return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
  185. def _fuse_bn_tensor(self, branch):
  186. """Generates appropriate kernels and biases for convolution by fusing branches of the neural network."""
  187. if branch is None:
  188. return 0, 0
  189. if isinstance(branch, Conv):
  190. kernel = branch.conv.weight
  191. running_mean = branch.bn.running_mean
  192. running_var = branch.bn.running_var
  193. gamma = branch.bn.weight
  194. beta = branch.bn.bias
  195. eps = branch.bn.eps
  196. elif isinstance(branch, nn.BatchNorm2d):
  197. if not hasattr(self, "id_tensor"):
  198. input_dim = self.c1 // self.g
  199. kernel_value = np.zeros((self.c1, input_dim, 3, 3), dtype=np.float32)
  200. for i in range(self.c1):
  201. kernel_value[i, i % input_dim, 1, 1] = 1
  202. self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
  203. kernel = self.id_tensor
  204. running_mean = branch.running_mean
  205. running_var = branch.running_var
  206. gamma = branch.weight
  207. beta = branch.bias
  208. eps = branch.eps
  209. std = (running_var + eps).sqrt()
  210. t = (gamma / std).reshape(-1, 1, 1, 1)
  211. return kernel * t, beta - running_mean * gamma / std
  212. def fuse_convs(self):
  213. """Combines two convolution layers into a single layer and removes unused attributes from the class."""
  214. if hasattr(self, "conv"):
  215. return
  216. kernel, bias = self.get_equivalent_kernel_bias()
  217. self.conv = nn.Conv2d(
  218. in_channels=self.conv1.conv.in_channels,
  219. out_channels=self.conv1.conv.out_channels,
  220. kernel_size=self.conv1.conv.kernel_size,
  221. stride=self.conv1.conv.stride,
  222. padding=self.conv1.conv.padding,
  223. dilation=self.conv1.conv.dilation,
  224. groups=self.conv1.conv.groups,
  225. bias=True,
  226. ).requires_grad_(False)
  227. self.conv.weight.data = kernel
  228. self.conv.bias.data = bias
  229. for para in self.parameters():
  230. para.detach_()
  231. self.__delattr__("conv1")
  232. self.__delattr__("conv2")
  233. if hasattr(self, "nm"):
  234. self.__delattr__("nm")
  235. if hasattr(self, "bn"):
  236. self.__delattr__("bn")
  237. if hasattr(self, "id_tensor"):
  238. self.__delattr__("id_tensor")
  239. class ChannelAttention(nn.Module):
  240. """Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""
  241. def __init__(self, channels: int) -> None:
  242. """Initializes the class and sets the basic configurations and instance variables required."""
  243. super().__init__()
  244. self.pool = nn.AdaptiveAvgPool2d(1)
  245. self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
  246. self.act = nn.Sigmoid()
  247. def forward(self, x: torch.Tensor) -> torch.Tensor:
  248. """Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
  249. return x * self.act(self.fc(self.pool(x)))
  250. class SpatialAttention(nn.Module):
  251. """Spatial-attention module."""
  252. def __init__(self, kernel_size=7):
  253. """Initialize Spatial-attention module with kernel size argument."""
  254. super().__init__()
  255. assert kernel_size in {3, 7}, "kernel size must be 3 or 7"
  256. padding = 3 if kernel_size == 7 else 1
  257. self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
  258. self.act = nn.Sigmoid()
  259. def forward(self, x):
  260. """Apply channel and spatial attention on input for feature recalibration."""
  261. return x * self.act(self.cv1(torch.cat([torch.mean(x, 1, keepdim=True), torch.max(x, 1, keepdim=True)[0]], 1)))
  262. class CBAM(nn.Module):
  263. """Convolutional Block Attention Module."""
  264. def __init__(self, c1, kernel_size=7):
  265. """Initialize CBAM with given input channel (c1) and kernel size."""
  266. super().__init__()
  267. self.channel_attention = ChannelAttention(c1)
  268. self.spatial_attention = SpatialAttention(kernel_size)
  269. def forward(self, x):
  270. """Applies the forward pass through C1 module."""
  271. return self.spatial_attention(self.channel_attention(x))
  272. class Concat(nn.Module):
  273. """Concatenate a list of tensors along dimension."""
  274. def __init__(self, dimension=1):
  275. """Concatenates a list of tensors along a specified dimension."""
  276. super().__init__()
  277. self.d = dimension
  278. def forward(self, x):
  279. """Forward pass for the YOLOv8 mask Proto module."""
  280. return torch.cat(x, self.d)
  281. class Index(nn.Module):
  282. """Returns a particular index of the input."""
  283. def __init__(self, c1, c2, index=0):
  284. """Returns a particular index of the input."""
  285. super().__init__()
  286. self.index = index
  287. def forward(self, x):
  288. """
  289. Forward pass.
  290. Expects a list of tensors as input.
  291. """
  292. return x[self.index]