|
@@ -0,0 +1,487 @@
|
|
|
|
|
+import logging
|
|
|
|
|
+import random
|
|
|
|
|
+from typing import Any
|
|
|
|
|
+
|
|
|
|
|
+import cv2
|
|
|
|
|
+import numpy as np
|
|
|
|
|
+import torch
|
|
|
|
|
+from PIL import Image
|
|
|
|
|
+from torch import nn, Tensor
|
|
|
|
|
+
|
|
|
|
|
+from libs.vision_libs .transforms import functional as F
|
|
|
|
|
+
|
|
|
|
|
+from libs.vision_libs import transforms
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class Compose:
|
|
|
|
|
+ def __init__(self, transforms):
|
|
|
|
|
+ self.transforms = transforms
|
|
|
|
|
+
|
|
|
|
|
+ def __call__(self, img, target):
|
|
|
|
|
+ for t in self.transforms:
|
|
|
|
|
+ img, target = t(img, target)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ return img, target
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class RandomHorizontalFlip:
|
|
|
|
|
+ def __init__(self, prob=0.5):
|
|
|
|
|
+ self.prob = prob
|
|
|
|
|
+
|
|
|
|
|
+ def __call__(self, img, target):
|
|
|
|
|
+ if random.random() < self.prob:
|
|
|
|
|
+ width = img.width if isinstance(img, Image.Image) else img.shape[-1]
|
|
|
|
|
+
|
|
|
|
|
+ # Flip image
|
|
|
|
|
+ img = F.hflip(img)
|
|
|
|
|
+
|
|
|
|
|
+ # Flip boxes
|
|
|
|
|
+ boxes = target["boxes"]
|
|
|
|
|
+ x1, y1, x2, y2 = boxes.unbind(dim=1)
|
|
|
|
|
+ boxes_flipped = torch.stack((width - x2, y1, width - x1, y2), dim=1)
|
|
|
|
|
+ target["boxes"] = boxes_flipped
|
|
|
|
|
+
|
|
|
|
|
+ # Flip lines
|
|
|
|
|
+ if "lines" in target:
|
|
|
|
|
+ lines = target["lines"].clone()
|
|
|
|
|
+ # 只翻转 x 坐标,y 和 visibility 不变
|
|
|
|
|
+ lines[..., 0] = width - lines[..., 0]
|
|
|
|
|
+ target["lines"] = lines
|
|
|
|
|
+
|
|
|
|
|
+ return img, target
|
|
|
|
|
+
|
|
|
|
|
+class RandomVerticalFlip:
|
|
|
|
|
+ def __init__(self, prob=0.5):
|
|
|
|
|
+ self.prob = prob
|
|
|
|
|
+
|
|
|
|
|
+ def __call__(self, img, target):
|
|
|
|
|
+ if random.random() < self.prob:
|
|
|
|
|
+ height = img.height if isinstance(img, Image.Image) else img.shape[-2]
|
|
|
|
|
+
|
|
|
|
|
+ # Flip image
|
|
|
|
|
+ img = F.vflip(img)
|
|
|
|
|
+
|
|
|
|
|
+ # Flip boxes
|
|
|
|
|
+ boxes = target["boxes"]
|
|
|
|
|
+ x1, y1, x2, y2 = boxes.unbind(dim=1)
|
|
|
|
|
+ boxes_flipped = torch.stack((x1, height - y2, x2, height - y1), dim=1)
|
|
|
|
|
+ target["boxes"] = boxes_flipped
|
|
|
|
|
+
|
|
|
|
|
+ # Flip lines
|
|
|
|
|
+ if "lines" in target:
|
|
|
|
|
+ lines = target["lines"].clone()
|
|
|
|
|
+ lines[..., 1] = height - lines[..., 1]
|
|
|
|
|
+ target["lines"] = lines
|
|
|
|
|
+
|
|
|
|
|
+ return img, target
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class ColorJitter:
|
|
|
|
|
+ def __init__(self, brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2):
|
|
|
|
|
+ if not (0 <= hue <= 0.5):
|
|
|
|
|
+ raise ValueError(f"Hue jitter value should be in [0, 0.5], but got {hue}")
|
|
|
|
|
+
|
|
|
|
|
+ self.color_jitter = transforms.ColorJitter(
|
|
|
|
|
+ brightness=brightness,
|
|
|
|
|
+ contrast=contrast,
|
|
|
|
|
+ saturation=saturation,
|
|
|
|
|
+ hue=hue
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ def __call__(self, img, target):
|
|
|
|
|
+ print(f"Original image type: {type(img)}")
|
|
|
|
|
+ img = self.color_jitter(img)
|
|
|
|
|
+ print("Color jitter applied successfully.")
|
|
|
|
|
+ return img, target
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class RandomGrayscale:
|
|
|
|
|
+ def __init__(self, p=0.1):
|
|
|
|
|
+ self.p = p
|
|
|
|
|
+
|
|
|
|
|
+ def __call__(self, img, target):
|
|
|
|
|
+ print(f"RandomGrayscale Original image type: {type(img)}")
|
|
|
|
|
+ if random.random() < self.p:
|
|
|
|
|
+ img = F.to_grayscale(img, num_output_channels=3)
|
|
|
|
|
+ return img, target
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class RandomResize:
|
|
|
|
|
+ def __init__(self, min_size, max_size=None):
|
|
|
|
|
+ self.min_size = min_size
|
|
|
|
|
+ self.max_size = max_size
|
|
|
|
|
+
|
|
|
|
|
+ def __call__(self, img, target):
|
|
|
|
|
+ size = random.randint(self.min_size, self.max_size) if self.max_size else self.min_size
|
|
|
|
|
+ w, h = img.size if isinstance(img, Image.Image) else (img.shape[2], img.shape[1])
|
|
|
|
|
+ scale = size / min(h, w)
|
|
|
|
|
+ new_h, new_w = int(scale * h), int(scale * w)
|
|
|
|
|
+ img = F.resize(img, (new_h, new_w))
|
|
|
|
|
+
|
|
|
|
|
+ # Update boxes
|
|
|
|
|
+ boxes = target["boxes"]
|
|
|
|
|
+ boxes = boxes * scale
|
|
|
|
|
+ target["boxes"] = boxes
|
|
|
|
|
+
|
|
|
|
|
+ # Update lines
|
|
|
|
|
+ if "lines" in target:
|
|
|
|
|
+ target["lines"] = target["lines"] * torch.tensor([scale, scale, 1], device=target["lines"].device)
|
|
|
|
|
+
|
|
|
|
|
+ return img, target
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class RandomCrop:
|
|
|
|
|
+ def __init__(self, size):
|
|
|
|
|
+ self.size = size
|
|
|
|
|
+
|
|
|
|
|
+ def __call__(self, img, target):
|
|
|
|
|
+ w, h = img.size if isinstance(img, Image.Image) else (img.shape[2], img.shape[1])
|
|
|
|
|
+ th, tw = self.size
|
|
|
|
|
+
|
|
|
|
|
+ if h <= th or w <= tw:
|
|
|
|
|
+ return img, target
|
|
|
|
|
+
|
|
|
|
|
+ i = random.randint(0, h - th)
|
|
|
|
|
+ j = random.randint(0, w - tw)
|
|
|
|
|
+
|
|
|
|
|
+ img = F.crop(img, i, j, th, tw)
|
|
|
|
|
+
|
|
|
|
|
+ # Adjust boxes
|
|
|
|
|
+ boxes = target["boxes"]
|
|
|
|
|
+ boxes = boxes - torch.tensor([j, i, j, i], device=boxes.device)
|
|
|
|
|
+ boxes = torch.clamp(boxes, min=0)
|
|
|
|
|
+ xmax, ymax = tw, th
|
|
|
|
|
+ boxes[:, [0, 2]] = boxes[:, [0, 2]].clamp(max=xmax)
|
|
|
|
|
+ boxes[:, [1, 3]] = boxes[:, [1, 3]].clamp(max=ymax)
|
|
|
|
|
+ target["boxes"] = boxes
|
|
|
|
|
+
|
|
|
|
|
+ # Adjust lines
|
|
|
|
|
+ if "lines" in target:
|
|
|
|
|
+ lines = target["lines"].clone()
|
|
|
|
|
+ lines[..., 0] -= j
|
|
|
|
|
+ lines[..., 1] -= i
|
|
|
|
|
+ lines = torch.clamp(lines, min=0)
|
|
|
|
|
+ lines[..., 0] = torch.clamp(lines[..., 0], max=tw)
|
|
|
|
|
+ lines[..., 1] = torch.clamp(lines[..., 1], max=th)
|
|
|
|
|
+ target["lines"] = lines
|
|
|
|
|
+
|
|
|
|
|
+ return img, target
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class GaussianBlur:
|
|
|
|
|
+ def __init__(self, kernel_size=5, sigma=(0.1, 2.0), prob=0.2):
|
|
|
|
|
+ self.kernel_size = kernel_size if kernel_size % 2 == 1 else kernel_size + 1 # Ensure kernel size is odd
|
|
|
|
|
+ self.sigma = sigma
|
|
|
|
|
+ self.prob = prob
|
|
|
|
|
+
|
|
|
|
|
+ def __call__(self, img, target):
|
|
|
|
|
+ if random.random() < self.prob:
|
|
|
|
|
+ # Convert PIL Image to Tensor if necessary
|
|
|
|
|
+ if isinstance(img, Image.Image):
|
|
|
|
|
+ img = transforms.ToTensor()(img)
|
|
|
|
|
+
|
|
|
|
|
+ # Apply Gaussian blur using PyTorch's functional interface
|
|
|
|
|
+ img = transforms.GaussianBlur(kernel_size=self.kernel_size, sigma=random.uniform(*self.sigma))(img)
|
|
|
|
|
+
|
|
|
|
|
+ # If the original image was a PIL Image, convert it back
|
|
|
|
|
+ if isinstance(img, Tensor) and not isinstance(target.get('original_image_format', None), Tensor):
|
|
|
|
|
+ img = transforms.ToPILImage()(img)
|
|
|
|
|
+
|
|
|
|
|
+ return img, target
|
|
|
|
|
+class RandomRotation:
|
|
|
|
|
+ def __init__(self, degrees=15, prob=0.5):
|
|
|
|
|
+ self.degrees = degrees
|
|
|
|
|
+ self.prob = prob
|
|
|
|
|
+
|
|
|
|
|
+ def rotate_boxes(self, boxes, angle, center):
|
|
|
|
|
+ # Convert to numpy for easier rotation math
|
|
|
|
|
+ boxes_np = boxes.cpu().numpy()
|
|
|
|
|
+ center_np = np.array(center)
|
|
|
|
|
+
|
|
|
|
|
+ corners = np.array([
|
|
|
|
|
+ [boxes_np[:, 0], boxes_np[:, 1]], # top-left
|
|
|
|
|
+ [boxes_np[:, 2], boxes_np[:, 1]], # top-right
|
|
|
|
|
+ [boxes_np[:, 2], boxes_np[:, 3]], # bottom-right
|
|
|
|
|
+ [boxes_np[:, 0], boxes_np[:, 3]] # bottom-left
|
|
|
|
|
+ ]).transpose(2, 0, 1) # shape: (N, 4, 2)
|
|
|
|
|
+
|
|
|
|
|
+ # Translate to origin
|
|
|
|
|
+ corners -= center_np
|
|
|
|
|
+
|
|
|
|
|
+ # Rotate points
|
|
|
|
|
+ theta = np.radians(angle)
|
|
|
|
|
+ c, s = np.cos(theta), np.sin(theta)
|
|
|
|
|
+ R = np.array([[c, -s], [s, c]])
|
|
|
|
|
+ rotated_corners = corners @ R
|
|
|
|
|
+
|
|
|
|
|
+ # Translate back
|
|
|
|
|
+ rotated_corners += center_np
|
|
|
|
|
+
|
|
|
|
|
+ # Get new bounding box coordinates
|
|
|
|
|
+ x_min = np.min(rotated_corners[:, :, 0], axis=1)
|
|
|
|
|
+ y_min = np.min(rotated_corners[:, :, 1], axis=1)
|
|
|
|
|
+ x_max = np.max(rotated_corners[:, :, 0], axis=1)
|
|
|
|
|
+ y_max = np.max(rotated_corners[:, :, 1], axis=1)
|
|
|
|
|
+
|
|
|
|
|
+ # Convert back to tensor and move to the same device
|
|
|
|
|
+ device = boxes.device
|
|
|
|
|
+ return torch.tensor(np.stack([x_min, y_min, x_max, y_max], axis=1), dtype=boxes.dtype, device=device)
|
|
|
|
|
+
|
|
|
|
|
+ def rotate_lines(self, lines, angle, center):
|
|
|
|
|
+ coords = lines[..., :2] # shape: (..., 2)
|
|
|
|
|
+ visibility = lines[..., 2:] # shape: (..., N)
|
|
|
|
|
+
|
|
|
|
|
+ # Translate to origin
|
|
|
|
|
+ coords = coords - torch.tensor(center, dtype=coords.dtype, device=coords.device)
|
|
|
|
|
+
|
|
|
|
|
+ # Rotation matrix
|
|
|
|
|
+ theta = torch.deg2rad(torch.tensor(angle))
|
|
|
|
|
+ cos_t = torch.cos(theta)
|
|
|
|
|
+ sin_t = torch.sin(theta)
|
|
|
|
|
+ R = torch.tensor([[cos_t, -sin_t], [sin_t, cos_t]], dtype=coords.dtype, device=coords.device)
|
|
|
|
|
+
|
|
|
|
|
+ # Apply rotation using torch.matmul
|
|
|
|
|
+ rotated_coords = torch.matmul(coords, R)
|
|
|
|
|
+
|
|
|
|
|
+ # Translate back
|
|
|
|
|
+ rotated_coords = rotated_coords + torch.tensor(center, dtype=coords.dtype, device=coords.device)
|
|
|
|
|
+
|
|
|
|
|
+ # Concatenate with visibility
|
|
|
|
|
+ rotated_lines = torch.cat([rotated_coords, visibility], dim=-1)
|
|
|
|
|
+ return rotated_lines
|
|
|
|
|
+
|
|
|
|
|
+ def __call__(self, img, target):
|
|
|
|
|
+ if random.random() < self.prob:
|
|
|
|
|
+ angle = random.uniform(-self.degrees, self.degrees)
|
|
|
|
|
+ w, h = img.size if isinstance(img, Image.Image) else (img.shape[2], img.shape[1])
|
|
|
|
|
+ center = (w / 2, h / 2)
|
|
|
|
|
+
|
|
|
|
|
+ # Rotate image
|
|
|
|
|
+ img = F.rotate(img, angle, center=center)
|
|
|
|
|
+
|
|
|
|
|
+ # Rotate boxes
|
|
|
|
|
+ if "boxes" in target:
|
|
|
|
|
+ target["boxes"] = self.rotate_boxes(target["boxes"], angle, center)
|
|
|
|
|
+
|
|
|
|
|
+ # Rotate lines
|
|
|
|
|
+ if "lines" in target:
|
|
|
|
|
+ target["lines"] = self.rotate_lines(target["lines"], angle, center)
|
|
|
|
|
+
|
|
|
|
|
+ return img, target
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class RandomErasing:
|
|
|
|
|
+ def __init__(self, prob=0.5, sl=0.02, sh=0.4, r1=0.3, mean=[0.485, 0.456, 0.406]):
|
|
|
|
|
+ """
|
|
|
|
|
+ :param prob: 应用擦除的概率
|
|
|
|
|
+ :param sl: 擦除面积比例的下界
|
|
|
|
|
+ :param sh: 擦除面积比例的上界
|
|
|
|
|
+ :param r1: 长宽比的下界
|
|
|
|
|
+ :param mean: 用于填充擦除区域的像素值
|
|
|
|
|
+ """
|
|
|
|
|
+ self.prob = prob
|
|
|
|
|
+ self.sl = sl
|
|
|
|
|
+ self.sh = sh
|
|
|
|
|
+ self.r1 = r1
|
|
|
|
|
+ self.mean = mean
|
|
|
|
|
+
|
|
|
|
|
+ def __call__(self, img, target):
|
|
|
|
|
+ if random.random() < self.prob:
|
|
|
|
|
+ # 如果是Tensor,则直接处理
|
|
|
|
|
+ if isinstance(img, torch.Tensor):
|
|
|
|
|
+ img = self._erase_tensor(img)
|
|
|
|
|
+ # 如果是PIL Image,则转换为Tensor处理后再转回PIL Image
|
|
|
|
|
+ elif isinstance(img, Image.Image):
|
|
|
|
|
+ img_tensor = transforms.ToTensor()(img)
|
|
|
|
|
+ img_tensor = self._erase_tensor(img_tensor)
|
|
|
|
|
+ img = transforms.ToPILImage()(img_tensor)
|
|
|
|
|
+
|
|
|
|
|
+ return img, target
|
|
|
|
|
+
|
|
|
|
|
+ def _erase_tensor(self, img_tensor):
|
|
|
|
|
+ """
|
|
|
|
|
+ 对Tensor类型的图像执行随机擦除
|
|
|
|
|
+ """
|
|
|
|
|
+ img_c, img_h, img_w = img_tensor.shape
|
|
|
|
|
+ area = img_h * img_w
|
|
|
|
|
+
|
|
|
|
|
+ # 计算擦除区域的大小
|
|
|
|
|
+ erase_area = random.uniform(self.sl, self.sh) * area
|
|
|
|
|
+ aspect_ratio = random.uniform(self.r1, 1 / self.r1)
|
|
|
|
|
+
|
|
|
|
|
+ h = int(round((erase_area * aspect_ratio) ** 0.5))
|
|
|
|
|
+ w = int(round((erase_area / aspect_ratio) ** 0.5))
|
|
|
|
|
+
|
|
|
|
|
+ # 确保不会超出图像边界
|
|
|
|
|
+ if h < img_h and w < img_w:
|
|
|
|
|
+ x = random.randint(0, img_w - w)
|
|
|
|
|
+ y = random.randint(0, img_h - h)
|
|
|
|
|
+
|
|
|
|
|
+ # 创建一个与擦除区域相同大小且填充指定均值的区域
|
|
|
|
|
+ mean_tensor = torch.tensor(self.mean).view(img_c, 1, 1).expand(img_c, h, w)
|
|
|
|
|
+
|
|
|
|
|
+ # 将该区域应用到原始图像上
|
|
|
|
|
+ img_tensor[:, y:y + h, x:x + w] = mean_tensor
|
|
|
|
|
+
|
|
|
|
|
+ return img_tensor
|
|
|
|
|
+
|
|
|
|
|
+"""
|
|
|
|
|
+有Bugs
|
|
|
|
|
+"""
|
|
|
|
|
+class RandomPerspective:
|
|
|
|
|
+ def __init__(self, distortion_scale=0.5, p=0.5):
|
|
|
|
|
+ self.distortion_scale = distortion_scale
|
|
|
|
|
+ self.p = p
|
|
|
|
|
+
|
|
|
|
|
+ def _get_perspective_params(self, width, height, distortion_scale):
|
|
|
|
|
+ half_w = width // 2
|
|
|
|
|
+ half_h = height // 2
|
|
|
|
|
+ w = int(width * distortion_scale)
|
|
|
|
|
+ h = int(height * distortion_scale)
|
|
|
|
|
+
|
|
|
|
|
+ startpoints = [
|
|
|
|
|
+ [0, 0],
|
|
|
|
|
+ [width - 1, 0],
|
|
|
|
|
+ [width - 1, height - 1],
|
|
|
|
|
+ [0, height - 1]
|
|
|
|
|
+ ]
|
|
|
|
|
+ endpoints = [
|
|
|
|
|
+ [random.randint(0, w), random.randint(0, h)],
|
|
|
|
|
+ [width - 1 - random.randint(0, w), random.randint(0, h)],
|
|
|
|
|
+ [width - 1 - random.randint(0, w), height - 1 - random.randint(0, h)],
|
|
|
|
|
+ [random.randint(0, w), height - 1 - random.randint(0, h)]
|
|
|
|
|
+ ]
|
|
|
|
|
+ return startpoints, endpoints
|
|
|
|
|
+
|
|
|
|
|
+ def perspective_boxes(self, boxes, M, width, height):
|
|
|
|
|
+ # 将boxes转换为角点形式
|
|
|
|
|
+ corners = np.array([
|
|
|
|
|
+ [boxes[:, 0], boxes[:, 1]], # top-left
|
|
|
|
|
+ [boxes[:, 2], boxes[:, 1]], # top-right
|
|
|
|
|
+ [boxes[:, 2], boxes[:, 3]], # bottom-right
|
|
|
|
|
+ [boxes[:, 0], boxes[:, 3]] # bottom-left
|
|
|
|
|
+ ]).transpose(2, 0, 1).reshape(-1, 2) # shape: (N*4, 2)
|
|
|
|
|
+
|
|
|
|
|
+ # 应用透视变换
|
|
|
|
|
+ ones = np.ones((corners.shape[0], 1))
|
|
|
|
|
+ coords_homogeneous = np.hstack([corners, ones])
|
|
|
|
|
+ transformed_coords = (M @ coords_homogeneous.T).T
|
|
|
|
|
+ transformed_coords /= transformed_coords[:, 2].reshape(-1, 1) # 齐次除法
|
|
|
|
|
+ transformed_coords = transformed_coords[:, :2]
|
|
|
|
|
+
|
|
|
|
|
+ # 重新组合成bounding box
|
|
|
|
|
+ transformed_coords = transformed_coords.reshape(-1, 4, 2)
|
|
|
|
|
+ x_min = np.min(transformed_coords[:, :, 0], axis=1)
|
|
|
|
|
+ y_min = np.min(transformed_coords[:, :, 1], axis=1)
|
|
|
|
|
+ x_max = np.max(transformed_coords[:, :, 0], axis=1)
|
|
|
|
|
+ y_max = np.max(transformed_coords[:, :, 1], axis=1)
|
|
|
|
|
+
|
|
|
|
|
+ # 裁剪到图像范围内
|
|
|
|
|
+ x_min = np.clip(x_min, 0, width)
|
|
|
|
|
+ y_min = np.clip(y_min, 0, height)
|
|
|
|
|
+ x_max = np.clip(x_max, 0, width)
|
|
|
|
|
+ y_max = np.clip(y_max, 0, height)
|
|
|
|
|
+
|
|
|
|
|
+ return torch.tensor(np.stack([x_min, y_min, x_max, y_max], axis=1), dtype=boxes.dtype, device=boxes.device)
|
|
|
|
|
+
|
|
|
|
|
+ def perspective_lines(self, lines, M, width, height):
|
|
|
|
|
+ # 提取坐标和可见性标志
|
|
|
|
|
+ coords = lines[..., :2].cpu().numpy() # Shape: (N, L, 2)
|
|
|
|
|
+ visibility = lines[..., 2:]
|
|
|
|
|
+
|
|
|
|
|
+ # 确保coords是二维数组,如果它是三维的,则将其重塑为二维
|
|
|
|
|
+ original_shape = coords.shape
|
|
|
|
|
+ coords_reshaped = coords.reshape(-1, 2) # Reshape to (N*L, 2)
|
|
|
|
|
+
|
|
|
|
|
+ # 添加齐次坐标
|
|
|
|
|
+ ones = np.ones((coords_reshaped.shape[0], 1))
|
|
|
|
|
+ coords_homogeneous = np.hstack([coords_reshaped, ones]) # Shape: (N*L, 3)
|
|
|
|
|
+
|
|
|
|
|
+ # 应用透视变换矩阵
|
|
|
|
|
+ transformed_coords_homogeneous = np.dot(M, coords_homogeneous.T).T
|
|
|
|
|
+ transformed_coords = transformed_coords_homogeneous[:, :2] / transformed_coords_homogeneous[:, 2:] # 归一化
|
|
|
|
|
+
|
|
|
|
|
+ # 将变换后的坐标恢复到原始形状
|
|
|
|
|
+ transformed_coords = transformed_coords.reshape(original_shape) # Reshape back to (N, L, 2)
|
|
|
|
|
+
|
|
|
|
|
+ # 裁剪到图像范围内
|
|
|
|
|
+ transformed_coords = np.clip(transformed_coords, [0, 0], [width, height])
|
|
|
|
|
+
|
|
|
|
|
+ # 转换回tensor
|
|
|
|
|
+ transformed_coords = torch.tensor(transformed_coords, dtype=lines.dtype, device=lines.device)
|
|
|
|
|
+ return torch.cat([transformed_coords, visibility], dim=-1)
|
|
|
|
|
+
|
|
|
|
|
+ def __call__(self, img, target):
|
|
|
|
|
+ if random.random() < self.p:
|
|
|
|
|
+ width, height = img.size if isinstance(img, Image.Image) else (img.shape[2], img.shape[1])
|
|
|
|
|
+ startpoints, endpoints = self._get_perspective_params(width, height, self.distortion_scale)
|
|
|
|
|
+
|
|
|
|
|
+ # 使用 OpenCV 计算透视变换矩阵
|
|
|
|
|
+ M = cv2.getPerspectiveTransform(
|
|
|
|
|
+ np.float32(startpoints),
|
|
|
|
|
+ np.float32(endpoints)
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 对图像应用透视变换
|
|
|
|
|
+ if isinstance(img, Image.Image):
|
|
|
|
|
+ img = img.transform((width, height), Image.PERSPECTIVE, M.flatten(), resample=Image.BILINEAR)
|
|
|
|
|
+ elif isinstance(img, torch.Tensor):
|
|
|
|
|
+ # 如果你需要用 TorchVision 实现,可以考虑使用 F.perspective,但更推荐配合PIL操作
|
|
|
|
|
+ pil_img = F.to_pil_image(img)
|
|
|
|
|
+ pil_img = pil_img.transform((width, height), Image.PERSPECTIVE, M.flatten(), resample=Image.BILINEAR)
|
|
|
|
|
+ img = F.to_tensor(pil_img)
|
|
|
|
|
+
|
|
|
|
|
+ # 对 boxes 变换
|
|
|
|
|
+ if "boxes" in target:
|
|
|
|
|
+ target["boxes"] = self.perspective_boxes(target["boxes"], M, width, height)
|
|
|
|
|
+
|
|
|
|
|
+ # 对 lines 变换
|
|
|
|
|
+ if "lines" in target:
|
|
|
|
|
+ target["lines"] = self.perspective_lines(target["lines"], M, width, height)
|
|
|
|
|
+
|
|
|
|
|
+ return img, target
|
|
|
|
|
+
|
|
|
|
|
+class DefaultTransform(nn.Module):
|
|
|
|
|
+ def forward(self, img: Tensor,target) -> tuple[Tensor, Any]:
|
|
|
|
|
+ if not isinstance(img, Tensor):
|
|
|
|
|
+ img = F.pil_to_tensor(img)
|
|
|
|
|
+ return F.convert_image_dtype(img, torch.float),target
|
|
|
|
|
+
|
|
|
|
|
+ def __repr__(self) -> str:
|
|
|
|
|
+ return self.__class__.__name__ + "()"
|
|
|
|
|
+
|
|
|
|
|
+ def describe(self) -> str:
|
|
|
|
|
+ return (
|
|
|
|
|
+ "Accepts ``PIL.Image``, batched ``(B, C, H, W)`` and single ``(C, H, W)`` image ``torch.Tensor`` objects. "
|
|
|
|
|
+ "The images are rescaled to ``[0.0, 1.0]``."
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+class ToTensor:
|
|
|
|
|
+ def __call__(self, img, target):
|
|
|
|
|
+ img = F.to_tensor(img)
|
|
|
|
|
+ return img, target
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def get_transforms(augmention=True):
|
|
|
|
|
+ transforms_list = []
|
|
|
|
|
+
|
|
|
|
|
+ if augmention:
|
|
|
|
|
+
|
|
|
|
|
+ transforms_list.append(ColorJitter())
|
|
|
|
|
+ transforms_list.append(RandomGrayscale(0.1))
|
|
|
|
|
+
|
|
|
|
|
+ transforms_list.append(GaussianBlur())
|
|
|
|
|
+ transforms_list.append(RandomErasing())
|
|
|
|
|
+ transforms_list.append(RandomHorizontalFlip(0.5))
|
|
|
|
|
+ transforms_list.append(RandomVerticalFlip(0.2))
|
|
|
|
|
+ # transforms_list.append(RandomPerspective())
|
|
|
|
|
+ transforms_list.append(RandomRotation(degrees=15))
|
|
|
|
|
+ transforms_list.append(RandomResize(512, 2048))
|
|
|
|
|
+
|
|
|
|
|
+ transforms_list.append(RandomCrop((512,512)))
|
|
|
|
|
+
|
|
|
|
|
+ transforms_list.append(DefaultTransform())
|
|
|
|
|
+
|
|
|
|
|
+ return Compose(transforms_list)
|