|
@@ -0,0 +1,1740 @@
|
|
|
|
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
|
|
+"""
|
|
|
|
+Ultralytics Results, Boxes and Masks classes for handling inference results.
|
|
|
|
+
|
|
|
|
+Usage: See https://docs.ultralytics.com/modes/predict/
|
|
|
|
+"""
|
|
|
|
+
|
|
|
|
+from copy import deepcopy
|
|
|
|
+from functools import lru_cache
|
|
|
|
+from pathlib import Path
|
|
|
|
+
|
|
|
|
+import numpy as np
|
|
|
|
+import torch
|
|
|
|
+
|
|
|
|
+from ultralytics.data.augment import LetterBox
|
|
|
|
+from ultralytics.utils import LOGGER, SimpleClass, ops
|
|
|
|
+from ultralytics.utils.checks import check_requirements
|
|
|
|
+from ultralytics.utils.plotting import Annotator, colors, save_one_box
|
|
|
|
+from ultralytics.utils.torch_utils import smart_inference_mode
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class BaseTensor(SimpleClass):
|
|
|
|
+ """
|
|
|
|
+ Base tensor class with additional methods for easy manipulation and device handling.
|
|
|
|
+
|
|
|
|
+ Attributes:
|
|
|
|
+ data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
|
|
|
|
+ orig_shape (Tuple[int, int]): Original shape of the image, typically in the format (height, width).
|
|
|
|
+
|
|
|
|
+ Methods:
|
|
|
|
+ cpu: Return a copy of the tensor stored in CPU memory.
|
|
|
|
+ numpy: Returns a copy of the tensor as a numpy array.
|
|
|
|
+ cuda: Moves the tensor to GPU memory, returning a new instance if necessary.
|
|
|
|
+ to: Return a copy of the tensor with the specified device and dtype.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> import torch
|
|
|
|
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
|
|
|
|
+ >>> orig_shape = (720, 1280)
|
|
|
|
+ >>> base_tensor = BaseTensor(data, orig_shape)
|
|
|
|
+ >>> cpu_tensor = base_tensor.cpu()
|
|
|
|
+ >>> numpy_array = base_tensor.numpy()
|
|
|
|
+ >>> gpu_tensor = base_tensor.cuda()
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+ def __init__(self, data, orig_shape) -> None:
|
|
|
|
+ """
|
|
|
|
+ Initialize BaseTensor with prediction data and the original shape of the image.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
|
|
|
|
+ orig_shape (Tuple[int, int]): Original shape of the image in (height, width) format.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> import torch
|
|
|
|
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
|
|
|
|
+ >>> orig_shape = (720, 1280)
|
|
|
|
+ >>> base_tensor = BaseTensor(data, orig_shape)
|
|
|
|
+ """
|
|
|
|
+ assert isinstance(data, (torch.Tensor, np.ndarray)), "data must be torch.Tensor or np.ndarray"
|
|
|
|
+ self.data = data
|
|
|
|
+ self.orig_shape = orig_shape
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ def shape(self):
|
|
|
|
+ """
|
|
|
|
+ Returns the shape of the underlying data tensor.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (Tuple[int, ...]): The shape of the data tensor.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> data = torch.rand(100, 4)
|
|
|
|
+ >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
|
|
|
|
+ >>> print(base_tensor.shape)
|
|
|
|
+ (100, 4)
|
|
|
|
+ """
|
|
|
|
+ return self.data.shape
|
|
|
|
+
|
|
|
|
+ def cpu(self):
|
|
|
|
+ """
|
|
|
|
+ Returns a copy of the tensor stored in CPU memory.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (BaseTensor): A new BaseTensor object with the data tensor moved to CPU memory.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]).cuda()
|
|
|
|
+ >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
|
|
|
|
+ >>> cpu_tensor = base_tensor.cpu()
|
|
|
|
+ >>> isinstance(cpu_tensor, BaseTensor)
|
|
|
|
+ True
|
|
|
|
+ >>> cpu_tensor.data.device
|
|
|
|
+ device(type='cpu')
|
|
|
|
+ """
|
|
|
|
+ return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
|
|
|
|
+
|
|
|
|
+ def numpy(self):
|
|
|
|
+ """
|
|
|
|
+ Returns a copy of the tensor as a numpy array.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (np.ndarray): A numpy array containing the same data as the original tensor.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
|
|
|
|
+ >>> orig_shape = (720, 1280)
|
|
|
|
+ >>> base_tensor = BaseTensor(data, orig_shape)
|
|
|
|
+ >>> numpy_array = base_tensor.numpy()
|
|
|
|
+ >>> print(type(numpy_array))
|
|
|
|
+ <class 'numpy.ndarray'>
|
|
|
|
+ """
|
|
|
|
+ return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
|
|
|
|
+
|
|
|
|
+ def cuda(self):
|
|
|
|
+ """
|
|
|
|
+ Moves the tensor to GPU memory.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (BaseTensor): A new BaseTensor instance with the data moved to GPU memory if it's not already a
|
|
|
|
+ numpy array, otherwise returns self.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> import torch
|
|
|
|
+ >>> from ultralytics.engine.results import BaseTensor
|
|
|
|
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
|
|
|
|
+ >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
|
|
|
|
+ >>> gpu_tensor = base_tensor.cuda()
|
|
|
|
+ >>> print(gpu_tensor.data.device)
|
|
|
|
+ cuda:0
|
|
|
|
+ """
|
|
|
|
+ return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape)
|
|
|
|
+
|
|
|
|
+ def to(self, *args, **kwargs):
|
|
|
|
+ """
|
|
|
|
+ Return a copy of the tensor with the specified device and dtype.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ *args (Any): Variable length argument list to be passed to torch.Tensor.to().
|
|
|
|
+ **kwargs (Any): Arbitrary keyword arguments to be passed to torch.Tensor.to().
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (BaseTensor): A new BaseTensor instance with the data moved to the specified device and/or dtype.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> base_tensor = BaseTensor(torch.randn(3, 4), orig_shape=(480, 640))
|
|
|
|
+ >>> cuda_tensor = base_tensor.to("cuda")
|
|
|
|
+ >>> float16_tensor = base_tensor.to(dtype=torch.float16)
|
|
|
|
+ """
|
|
|
|
+ return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape)
|
|
|
|
+
|
|
|
|
+ def __len__(self): # override len(results)
|
|
|
|
+ """
|
|
|
|
+ Returns the length of the underlying data tensor.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (int): The number of elements in the first dimension of the data tensor.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
|
|
|
|
+ >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
|
|
|
|
+ >>> len(base_tensor)
|
|
|
|
+ 2
|
|
|
|
+ """
|
|
|
|
+ return len(self.data)
|
|
|
|
+
|
|
|
|
+ def __getitem__(self, idx):
|
|
|
|
+ """
|
|
|
|
+ Returns a new BaseTensor instance containing the specified indexed elements of the data tensor.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ idx (int | List[int] | torch.Tensor): Index or indices to select from the data tensor.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (BaseTensor): A new BaseTensor instance containing the indexed data.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
|
|
|
|
+ >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
|
|
|
|
+ >>> result = base_tensor[0] # Select the first row
|
|
|
|
+ >>> print(result.data)
|
|
|
|
+ tensor([1, 2, 3])
|
|
|
|
+ """
|
|
|
|
+ return self.__class__(self.data[idx], self.orig_shape)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class Results(SimpleClass):
|
|
|
|
+ """
|
|
|
|
+ A class for storing and manipulating inference results.
|
|
|
|
+
|
|
|
|
+ This class encapsulates the functionality for handling detection, segmentation, pose estimation,
|
|
|
|
+ and classification results from YOLO models.
|
|
|
|
+
|
|
|
|
+ Attributes:
|
|
|
|
+ orig_img (numpy.ndarray): Original image as a numpy array.
|
|
|
|
+ orig_shape (Tuple[int, int]): Original image shape in (height, width) format.
|
|
|
|
+ boxes (Boxes | None): Object containing detection bounding boxes.
|
|
|
|
+ masks (Masks | None): Object containing detection masks.
|
|
|
|
+ probs (Probs | None): Object containing class probabilities for classification tasks.
|
|
|
|
+ keypoints (Keypoints | None): Object containing detected keypoints for each object.
|
|
|
|
+ obb (OBB | None): Object containing oriented bounding boxes.
|
|
|
|
+ speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.
|
|
|
|
+ names (Dict[int, str]): Dictionary mapping class IDs to class names.
|
|
|
|
+ path (str): Path to the image file.
|
|
|
|
+ _keys (Tuple[str, ...]): Tuple of attribute names for internal use.
|
|
|
|
+
|
|
|
|
+ Methods:
|
|
|
|
+ update: Updates object attributes with new detection results.
|
|
|
|
+ cpu: Returns a copy of the Results object with all tensors on CPU memory.
|
|
|
|
+ numpy: Returns a copy of the Results object with all tensors as numpy arrays.
|
|
|
|
+ cuda: Returns a copy of the Results object with all tensors on GPU memory.
|
|
|
|
+ to: Returns a copy of the Results object with tensors on a specified device and dtype.
|
|
|
|
+ new: Returns a new Results object with the same image, path, and names.
|
|
|
|
+ plot: Plots detection results on an input image, returning an annotated image.
|
|
|
|
+ show: Shows annotated results on screen.
|
|
|
|
+ save: Saves annotated results to file.
|
|
|
|
+ verbose: Returns a log string for each task, detailing detections and classifications.
|
|
|
|
+ save_txt: Saves detection results to a text file.
|
|
|
|
+ save_crop: Saves cropped detection images.
|
|
|
|
+ tojson: Converts detection results to JSON format.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... print(result.boxes) # Print detection boxes
|
|
|
|
+ ... result.show() # Display the annotated image
|
|
|
|
+ ... result.save(filename="result.jpg") # Save annotated image
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+ def __init__(
|
|
|
|
+ self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None, obb=None, speed=None
|
|
|
|
+ ) -> None:
|
|
|
|
+ """
|
|
|
|
+ Initialize the Results class for storing and manipulating inference results.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ orig_img (numpy.ndarray): The original image as a numpy array.
|
|
|
|
+ path (str): The path to the image file.
|
|
|
|
+ names (Dict): A dictionary of class names.
|
|
|
|
+ boxes (torch.Tensor | None): A 2D tensor of bounding box coordinates for each detection.
|
|
|
|
+ masks (torch.Tensor | None): A 3D tensor of detection masks, where each mask is a binary image.
|
|
|
|
+ probs (torch.Tensor | None): A 1D tensor of probabilities of each class for classification task.
|
|
|
|
+ keypoints (torch.Tensor | None): A 2D tensor of keypoint coordinates for each detection.
|
|
|
|
+ obb (torch.Tensor | None): A 2D tensor of oriented bounding box coordinates for each detection.
|
|
|
|
+ speed (Dict | None): A dictionary containing preprocess, inference, and postprocess speeds (ms/image).
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> result = results[0] # Get the first result
|
|
|
|
+ >>> boxes = result.boxes # Get the boxes for the first result
|
|
|
|
+ >>> masks = result.masks # Get the masks for the first result
|
|
|
|
+
|
|
|
|
+ Notes:
|
|
|
|
+ For the default pose model, keypoint indices for human body pose estimation are:
|
|
|
|
+ 0: Nose, 1: Left Eye, 2: Right Eye, 3: Left Ear, 4: Right Ear
|
|
|
|
+ 5: Left Shoulder, 6: Right Shoulder, 7: Left Elbow, 8: Right Elbow
|
|
|
|
+ 9: Left Wrist, 10: Right Wrist, 11: Left Hip, 12: Right Hip
|
|
|
|
+ 13: Left Knee, 14: Right Knee, 15: Left Ankle, 16: Right Ankle
|
|
|
|
+ """
|
|
|
|
+ self.orig_img = orig_img
|
|
|
|
+ self.orig_shape = orig_img.shape[:2]
|
|
|
|
+ self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes
|
|
|
|
+ self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks
|
|
|
|
+ self.probs = Probs(probs) if probs is not None else None
|
|
|
|
+ self.keypoints = Keypoints(keypoints, self.orig_shape) if keypoints is not None else None
|
|
|
|
+ self.obb = OBB(obb, self.orig_shape) if obb is not None else None
|
|
|
|
+ self.speed = speed if speed is not None else {"preprocess": None, "inference": None, "postprocess": None}
|
|
|
|
+ self.names = names
|
|
|
|
+ self.path = path
|
|
|
|
+ self.save_dir = None
|
|
|
|
+ self._keys = "boxes", "masks", "probs", "keypoints", "obb"
|
|
|
|
+
|
|
|
|
+ def __getitem__(self, idx):
|
|
|
|
+ """
|
|
|
|
+ Return a Results object for a specific index of inference results.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ idx (int | slice): Index or slice to retrieve from the Results object.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (Results): A new Results object containing the specified subset of inference results.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg") # Perform inference
|
|
|
|
+ >>> single_result = results[0] # Get the first result
|
|
|
|
+ >>> subset_results = results[1:4] # Get a slice of results
|
|
|
|
+ """
|
|
|
|
+ return self._apply("__getitem__", idx)
|
|
|
|
+
|
|
|
|
+ def __len__(self):
|
|
|
|
+ """
|
|
|
|
+ Return the number of detections in the Results object.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (int): The number of detections, determined by the length of the first non-empty attribute
|
|
|
|
+ (boxes, masks, probs, keypoints, or obb).
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = Results(orig_img, path, names, boxes=torch.rand(5, 4))
|
|
|
|
+ >>> len(results)
|
|
|
|
+ 5
|
|
|
|
+ """
|
|
|
|
+ for k in self._keys:
|
|
|
|
+ v = getattr(self, k)
|
|
|
|
+ if v is not None:
|
|
|
|
+ return len(v)
|
|
|
|
+
|
|
|
|
+ def update(self, boxes=None, masks=None, probs=None, obb=None):
|
|
|
|
+ """
|
|
|
|
+ Updates the Results object with new detection data.
|
|
|
|
+
|
|
|
|
+ This method allows updating the boxes, masks, probabilities, and oriented bounding boxes (OBB) of the
|
|
|
|
+ Results object. It ensures that boxes are clipped to the original image shape.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ boxes (torch.Tensor | None): A tensor of shape (N, 6) containing bounding box coordinates and
|
|
|
|
+ confidence scores. The format is (x1, y1, x2, y2, conf, class).
|
|
|
|
+ masks (torch.Tensor | None): A tensor of shape (N, H, W) containing segmentation masks.
|
|
|
|
+ probs (torch.Tensor | None): A tensor of shape (num_classes,) containing class probabilities.
|
|
|
|
+ obb (torch.Tensor | None): A tensor of shape (N, 5) containing oriented bounding box coordinates.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg")
|
|
|
|
+ >>> new_boxes = torch.tensor([[100, 100, 200, 200, 0.9, 0]])
|
|
|
|
+ >>> results[0].update(boxes=new_boxes)
|
|
|
|
+ """
|
|
|
|
+ if boxes is not None:
|
|
|
|
+ self.boxes = Boxes(ops.clip_boxes(boxes, self.orig_shape), self.orig_shape)
|
|
|
|
+ if masks is not None:
|
|
|
|
+ self.masks = Masks(masks, self.orig_shape)
|
|
|
|
+ if probs is not None:
|
|
|
|
+ self.probs = probs
|
|
|
|
+ if obb is not None:
|
|
|
|
+ self.obb = OBB(obb, self.orig_shape)
|
|
|
|
+
|
|
|
|
+ def _apply(self, fn, *args, **kwargs):
|
|
|
|
+ """
|
|
|
|
+ Applies a function to all non-empty attributes and returns a new Results object with modified attributes.
|
|
|
|
+
|
|
|
|
+ This method is internally called by methods like .to(), .cuda(), .cpu(), etc.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ fn (str): The name of the function to apply.
|
|
|
|
+ *args (Any): Variable length argument list to pass to the function.
|
|
|
|
+ **kwargs (Any): Arbitrary keyword arguments to pass to the function.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (Results): A new Results object with attributes modified by the applied function.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... result_cuda = result.cuda()
|
|
|
|
+ ... result_cpu = result.cpu()
|
|
|
|
+ """
|
|
|
|
+ r = self.new()
|
|
|
|
+ for k in self._keys:
|
|
|
|
+ v = getattr(self, k)
|
|
|
|
+ if v is not None:
|
|
|
|
+ setattr(r, k, getattr(v, fn)(*args, **kwargs))
|
|
|
|
+ return r
|
|
|
|
+
|
|
|
|
+ def cpu(self):
|
|
|
|
+ """
|
|
|
|
+ Returns a copy of the Results object with all its tensors moved to CPU memory.
|
|
|
|
+
|
|
|
|
+ This method creates a new Results object with all tensor attributes (boxes, masks, probs, keypoints, obb)
|
|
|
|
+ transferred to CPU memory. It's useful for moving data from GPU to CPU for further processing or saving.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (Results): A new Results object with all tensor attributes on CPU memory.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg") # Perform inference
|
|
|
|
+ >>> cpu_result = results[0].cpu() # Move the first result to CPU
|
|
|
|
+ >>> print(cpu_result.boxes.device) # Output: cpu
|
|
|
|
+ """
|
|
|
|
+ return self._apply("cpu")
|
|
|
|
+
|
|
|
|
+ def numpy(self):
|
|
|
|
+ """
|
|
|
|
+ Converts all tensors in the Results object to numpy arrays.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (Results): A new Results object with all tensors converted to numpy arrays.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> numpy_result = results[0].numpy()
|
|
|
|
+ >>> type(numpy_result.boxes.data)
|
|
|
|
+ <class 'numpy.ndarray'>
|
|
|
|
+
|
|
|
|
+ Notes:
|
|
|
|
+ This method creates a new Results object, leaving the original unchanged. It's useful for
|
|
|
|
+ interoperability with numpy-based libraries or when CPU-based operations are required.
|
|
|
|
+ """
|
|
|
|
+ return self._apply("numpy")
|
|
|
|
+
|
|
|
|
+ def cuda(self):
|
|
|
|
+ """
|
|
|
|
+ Moves all tensors in the Results object to GPU memory.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (Results): A new Results object with all tensors moved to CUDA device.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> cuda_results = results[0].cuda() # Move first result to GPU
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... result_cuda = result.cuda() # Move each result to GPU
|
|
|
|
+ """
|
|
|
|
+ return self._apply("cuda")
|
|
|
|
+
|
|
|
|
+ def to(self, *args, **kwargs):
|
|
|
|
+ """
|
|
|
|
+ Moves all tensors in the Results object to the specified device and dtype.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ *args (Any): Variable length argument list to be passed to torch.Tensor.to().
|
|
|
|
+ **kwargs (Any): Arbitrary keyword arguments to be passed to torch.Tensor.to().
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (Results): A new Results object with all tensors moved to the specified device and dtype.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> result_cuda = results[0].to("cuda") # Move first result to GPU
|
|
|
|
+ >>> result_cpu = results[0].to("cpu") # Move first result to CPU
|
|
|
|
+ >>> result_half = results[0].to(dtype=torch.float16) # Convert first result to half precision
|
|
|
|
+ """
|
|
|
|
+ return self._apply("to", *args, **kwargs)
|
|
|
|
+
|
|
|
|
+ def new(self):
|
|
|
|
+ """
|
|
|
|
+ Creates a new Results object with the same image, path, names, and speed attributes.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (Results): A new Results object with copied attributes from the original instance.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> new_result = results[0].new()
|
|
|
|
+ """
|
|
|
|
+ return Results(orig_img=self.orig_img, path=self.path, names=self.names, speed=self.speed)
|
|
|
|
+
|
|
|
|
+ def plot(
|
|
|
|
+ self,
|
|
|
|
+ conf=True,
|
|
|
|
+ line_width=None,
|
|
|
|
+ font_size=None,
|
|
|
|
+ font="Arial.ttf",
|
|
|
|
+ pil=False,
|
|
|
|
+ img=None,
|
|
|
|
+ im_gpu=None,
|
|
|
|
+ kpt_radius=5,
|
|
|
|
+ kpt_line=True,
|
|
|
|
+ labels=True,
|
|
|
|
+ boxes=True,
|
|
|
|
+ masks=True,
|
|
|
|
+ probs=True,
|
|
|
|
+ show=False,
|
|
|
|
+ save=False,
|
|
|
|
+ filename=None,
|
|
|
|
+ color_mode="class",
|
|
|
|
+ ):
|
|
|
|
+ """
|
|
|
|
+ Plots detection results on an input RGB image.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ conf (bool): Whether to plot detection confidence scores.
|
|
|
|
+ line_width (float | None): Line width of bounding boxes. If None, scaled to image size.
|
|
|
|
+ font_size (float | None): Font size for text. If None, scaled to image size.
|
|
|
|
+ font (str): Font to use for text.
|
|
|
|
+ pil (bool): Whether to return the image as a PIL Image.
|
|
|
|
+ img (np.ndarray | None): Image to plot on. If None, uses original image.
|
|
|
|
+ im_gpu (torch.Tensor | None): Normalized image on GPU for faster mask plotting.
|
|
|
|
+ kpt_radius (int): Radius of drawn keypoints.
|
|
|
|
+ kpt_line (bool): Whether to draw lines connecting keypoints.
|
|
|
|
+ labels (bool): Whether to plot labels of bounding boxes.
|
|
|
|
+ boxes (bool): Whether to plot bounding boxes.
|
|
|
|
+ masks (bool): Whether to plot masks.
|
|
|
|
+ probs (bool): Whether to plot classification probabilities.
|
|
|
|
+ show (bool): Whether to display the annotated image.
|
|
|
|
+ save (bool): Whether to save the annotated image.
|
|
|
|
+ filename (str | None): Filename to save image if save is True.
|
|
|
|
+ color_mode (bool): Specify the color mode, e.g., 'instance' or 'class'. Default to 'class'.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (np.ndarray): Annotated image as a numpy array.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg")
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... im = result.plot()
|
|
|
|
+ ... im.show()
|
|
|
|
+ """
|
|
|
|
+ assert color_mode in {"instance", "class"}, f"Expected color_mode='instance' or 'class', not {color_mode}."
|
|
|
|
+ if img is None and isinstance(self.orig_img, torch.Tensor):
|
|
|
|
+ img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy()
|
|
|
|
+
|
|
|
|
+ names = self.names
|
|
|
|
+ is_obb = self.obb is not None
|
|
|
|
+ pred_boxes, show_boxes = self.obb if is_obb else self.boxes, boxes
|
|
|
|
+ pred_masks, show_masks = self.masks, masks
|
|
|
|
+ pred_probs, show_probs = self.probs, probs
|
|
|
|
+ annotator = Annotator(
|
|
|
|
+ deepcopy(self.orig_img if img is None else img),
|
|
|
|
+ line_width,
|
|
|
|
+ font_size,
|
|
|
|
+ font,
|
|
|
|
+ pil or (pred_probs is not None and show_probs), # Classify tasks default to pil=True
|
|
|
|
+ example=names,
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Plot Segment results
|
|
|
|
+ if pred_masks and show_masks:
|
|
|
|
+ if im_gpu is None:
|
|
|
|
+ img = LetterBox(pred_masks.shape[1:])(image=annotator.result())
|
|
|
|
+ im_gpu = (
|
|
|
|
+ torch.as_tensor(img, dtype=torch.float16, device=pred_masks.data.device)
|
|
|
|
+ .permute(2, 0, 1)
|
|
|
|
+ .flip(0)
|
|
|
|
+ .contiguous()
|
|
|
|
+ / 255
|
|
|
|
+ )
|
|
|
|
+ idx = (
|
|
|
|
+ pred_boxes.id
|
|
|
|
+ if pred_boxes.id is not None and color_mode == "instance"
|
|
|
|
+ else pred_boxes.cls
|
|
|
|
+ if pred_boxes and color_mode == "class"
|
|
|
|
+ else reversed(range(len(pred_masks)))
|
|
|
|
+ )
|
|
|
|
+ annotator.masks(pred_masks.data, colors=[colors(x, True) for x in idx], im_gpu=im_gpu)
|
|
|
|
+
|
|
|
|
+ # Plot Detect results
|
|
|
|
+ if pred_boxes is not None and show_boxes:
|
|
|
|
+ for i, d in enumerate(reversed(pred_boxes)):
|
|
|
|
+ c, d_conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
|
|
|
|
+ name = ("" if id is None else f"id:{id} ") + names[c]
|
|
|
|
+ label = (f"{name} {d_conf:.2f}" if conf else name) if labels else None
|
|
|
|
+ box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze()
|
|
|
|
+ annotator.box_label(
|
|
|
|
+ box,
|
|
|
|
+ label,
|
|
|
|
+ color=colors(
|
|
|
|
+ c
|
|
|
|
+ if color_mode == "class"
|
|
|
|
+ else id
|
|
|
|
+ if id is not None
|
|
|
|
+ else i
|
|
|
|
+ if color_mode == "instance"
|
|
|
|
+ else None,
|
|
|
|
+ True,
|
|
|
|
+ ),
|
|
|
|
+ rotated=is_obb,
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Plot Classify results
|
|
|
|
+ if pred_probs is not None and show_probs:
|
|
|
|
+ text = ",\n".join(f"{names[j] if names else j} {pred_probs.data[j]:.2f}" for j in pred_probs.top5)
|
|
|
|
+ x = round(self.orig_shape[0] * 0.03)
|
|
|
|
+ annotator.text([x, x], text, txt_color=(255, 255, 255)) # TODO: allow setting colors
|
|
|
|
+
|
|
|
|
+ # Plot Pose results
|
|
|
|
+ if self.keypoints is not None:
|
|
|
|
+ for i, k in enumerate(reversed(self.keypoints.data)):
|
|
|
|
+ annotator.kpts(
|
|
|
|
+ k,
|
|
|
|
+ self.orig_shape,
|
|
|
|
+ radius=kpt_radius,
|
|
|
|
+ kpt_line=kpt_line,
|
|
|
|
+ kpt_color=colors(i, True) if color_mode == "instance" else None,
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ # Show results
|
|
|
|
+ if show:
|
|
|
|
+ annotator.show(self.path)
|
|
|
|
+
|
|
|
|
+ # Save results
|
|
|
|
+ if save:
|
|
|
|
+ annotator.save(filename)
|
|
|
|
+
|
|
|
|
+ return annotator.result()
|
|
|
|
+
|
|
|
|
+ def show(self, *args, **kwargs):
|
|
|
|
+ """
|
|
|
|
+ Display the image with annotated inference results.
|
|
|
|
+
|
|
|
|
+ This method plots the detection results on the original image and displays it. It's a convenient way to
|
|
|
|
+ visualize the model's predictions directly.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ *args (Any): Variable length argument list to be passed to the `plot()` method.
|
|
|
|
+ **kwargs (Any): Arbitrary keyword arguments to be passed to the `plot()` method.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> results[0].show() # Display the first result
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... result.show() # Display all results
|
|
|
|
+ """
|
|
|
|
+ self.plot(show=True, *args, **kwargs)
|
|
|
|
+
|
|
|
|
+ def save(self, filename=None, *args, **kwargs):
|
|
|
|
+ """
|
|
|
|
+ Saves annotated inference results image to file.
|
|
|
|
+
|
|
|
|
+ This method plots the detection results on the original image and saves the annotated image to a file. It
|
|
|
|
+ utilizes the `plot` method to generate the annotated image and then saves it to the specified filename.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ filename (str | Path | None): The filename to save the annotated image. If None, a default filename
|
|
|
|
+ is generated based on the original image path.
|
|
|
|
+ *args (Any): Variable length argument list to be passed to the `plot` method.
|
|
|
|
+ **kwargs (Any): Arbitrary keyword arguments to be passed to the `plot` method.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... result.save("annotated_image.jpg")
|
|
|
|
+ >>> # Or with custom plot arguments
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... result.save("annotated_image.jpg", conf=False, line_width=2)
|
|
|
|
+ """
|
|
|
|
+ if not filename:
|
|
|
|
+ filename = f"results_{Path(self.path).name}"
|
|
|
|
+ self.plot(save=True, filename=filename, *args, **kwargs)
|
|
|
|
+ return filename
|
|
|
|
+
|
|
|
|
+ def verbose(self):
|
|
|
|
+ """
|
|
|
|
+ Returns a log string for each task in the results, detailing detection and classification outcomes.
|
|
|
|
+
|
|
|
|
+ This method generates a human-readable string summarizing the detection and classification results. It includes
|
|
|
|
+ the number of detections for each class and the top probabilities for classification tasks.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (str): A formatted string containing a summary of the results. For detection tasks, it includes the
|
|
|
|
+ number of detections per class. For classification tasks, it includes the top 5 class probabilities.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... print(result.verbose())
|
|
|
|
+ 2 persons, 1 car, 3 traffic lights,
|
|
|
|
+ dog 0.92, cat 0.78, horse 0.64,
|
|
|
|
+
|
|
|
|
+ Notes:
|
|
|
|
+ - If there are no detections, the method returns "(no detections), " for detection tasks.
|
|
|
|
+ - For classification tasks, it returns the top 5 class probabilities and their corresponding class names.
|
|
|
|
+ - The returned string is comma-separated and ends with a comma and a space.
|
|
|
|
+ """
|
|
|
|
+ log_string = ""
|
|
|
|
+ probs = self.probs
|
|
|
|
+ if len(self) == 0:
|
|
|
|
+ return log_string if probs is not None else f"{log_string}(no detections), "
|
|
|
|
+ if probs is not None:
|
|
|
|
+ log_string += f"{', '.join(f'{self.names[j]} {probs.data[j]:.2f}' for j in probs.top5)}, "
|
|
|
|
+ if boxes := self.boxes:
|
|
|
|
+ for c in boxes.cls.unique():
|
|
|
|
+ n = (boxes.cls == c).sum() # detections per class
|
|
|
|
+ log_string += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "
|
|
|
|
+ return log_string
|
|
|
|
+
|
|
|
|
+ def save_txt(self, txt_file, save_conf=False):
|
|
|
|
+ """
|
|
|
|
+ Save detection results to a text file.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ txt_file (str | Path): Path to the output text file.
|
|
|
|
+ save_conf (bool): Whether to include confidence scores in the output.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (str): Path to the saved text file.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> from ultralytics import YOLO
|
|
|
|
+ >>> model = YOLO("yolo11n.pt")
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... result.save_txt("output.txt")
|
|
|
|
+
|
|
|
|
+ Notes:
|
|
|
|
+ - The file will contain one line per detection or classification with the following structure:
|
|
|
|
+ - For detections: `class confidence x_center y_center width height`
|
|
|
|
+ - For classifications: `confidence class_name`
|
|
|
|
+ - For masks and keypoints, the specific formats will vary accordingly.
|
|
|
|
+ - The function will create the output directory if it does not exist.
|
|
|
|
+ - If save_conf is False, the confidence scores will be excluded from the output.
|
|
|
|
+ - Existing contents of the file will not be overwritten; new results will be appended.
|
|
|
|
+ """
|
|
|
|
+ is_obb = self.obb is not None
|
|
|
|
+ boxes = self.obb if is_obb else self.boxes
|
|
|
|
+ masks = self.masks
|
|
|
|
+ probs = self.probs
|
|
|
|
+ kpts = self.keypoints
|
|
|
|
+ texts = []
|
|
|
|
+ if probs is not None:
|
|
|
|
+ # Classify
|
|
|
|
+ [texts.append(f"{probs.data[j]:.2f} {self.names[j]}") for j in probs.top5]
|
|
|
|
+ elif boxes:
|
|
|
|
+ # Detect/segment/pose
|
|
|
|
+ for j, d in enumerate(boxes):
|
|
|
|
+ c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
|
|
|
|
+ line = (c, *(d.xyxyxyxyn.view(-1) if is_obb else d.xywhn.view(-1)))
|
|
|
|
+ if masks:
|
|
|
|
+ seg = masks[j].xyn[0].copy().reshape(-1) # reversed mask.xyn, (n,2) to (n*2)
|
|
|
|
+ line = (c, *seg)
|
|
|
|
+ if kpts is not None:
|
|
|
|
+ kpt = torch.cat((kpts[j].xyn, kpts[j].conf[..., None]), 2) if kpts[j].has_visible else kpts[j].xyn
|
|
|
|
+ line += (*kpt.reshape(-1).tolist(),)
|
|
|
|
+ line += (conf,) * save_conf + (() if id is None else (id,))
|
|
|
|
+ texts.append(("%g " * len(line)).rstrip() % line)
|
|
|
|
+
|
|
|
|
+ if texts:
|
|
|
|
+ Path(txt_file).parent.mkdir(parents=True, exist_ok=True) # make directory
|
|
|
|
+ with open(txt_file, "a") as f:
|
|
|
|
+ f.writelines(text + "\n" for text in texts)
|
|
|
|
+
|
|
|
|
+ def save_crop(self, save_dir, file_name=Path("im.jpg")):
|
|
|
|
+ """
|
|
|
|
+ Saves cropped detection images to specified directory.
|
|
|
|
+
|
|
|
|
+ This method saves cropped images of detected objects to a specified directory. Each crop is saved in a
|
|
|
|
+ subdirectory named after the object's class, with the filename based on the input file_name.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ save_dir (str | Path): Directory path where cropped images will be saved.
|
|
|
|
+ file_name (str | Path): Base filename for the saved cropped images. Default is Path("im.jpg").
|
|
|
|
+
|
|
|
|
+ Notes:
|
|
|
|
+ - This method does not support Classify or Oriented Bounding Box (OBB) tasks.
|
|
|
|
+ - Crops are saved as 'save_dir/class_name/file_name.jpg'.
|
|
|
|
+ - The method will create necessary subdirectories if they don't exist.
|
|
|
|
+ - Original image is copied before cropping to avoid modifying the original.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... result.save_crop(save_dir="path/to/crops", file_name="detection")
|
|
|
|
+ """
|
|
|
|
+ if self.probs is not None:
|
|
|
|
+ LOGGER.warning("WARNING ⚠️ Classify task do not support `save_crop`.")
|
|
|
|
+ return
|
|
|
|
+ if self.obb is not None:
|
|
|
|
+ LOGGER.warning("WARNING ⚠️ OBB task do not support `save_crop`.")
|
|
|
|
+ return
|
|
|
|
+ for d in self.boxes:
|
|
|
|
+ save_one_box(
|
|
|
|
+ d.xyxy,
|
|
|
|
+ self.orig_img.copy(),
|
|
|
|
+ file=Path(save_dir) / self.names[int(d.cls)] / Path(file_name).with_suffix(".jpg"),
|
|
|
|
+ BGR=True,
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ def summary(self, normalize=False, decimals=5):
|
|
|
|
+ """
|
|
|
|
+ Converts inference results to a summarized dictionary with optional normalization for box coordinates.
|
|
|
|
+
|
|
|
|
+ This method creates a list of detection dictionaries, each containing information about a single
|
|
|
|
+ detection or classification result. For classification tasks, it returns the top class and its
|
|
|
|
+ confidence. For detection tasks, it includes class information, bounding box coordinates, and
|
|
|
|
+ optionally mask segments and keypoints.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ normalize (bool): Whether to normalize bounding box coordinates by image dimensions. Defaults to False.
|
|
|
|
+ decimals (int): Number of decimal places to round the output values to. Defaults to 5.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (List[Dict]): A list of dictionaries, each containing summarized information for a single
|
|
|
|
+ detection or classification result. The structure of each dictionary varies based on the
|
|
|
|
+ task type (classification or detection) and available information (boxes, masks, keypoints).
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg")
|
|
|
|
+ >>> summary = results[0].summary()
|
|
|
|
+ >>> print(summary)
|
|
|
|
+ """
|
|
|
|
+ # Create list of detection dictionaries
|
|
|
|
+ results = []
|
|
|
|
+ if self.probs is not None:
|
|
|
|
+ class_id = self.probs.top1
|
|
|
|
+ results.append(
|
|
|
|
+ {
|
|
|
|
+ "name": self.names[class_id],
|
|
|
|
+ "class": class_id,
|
|
|
|
+ "confidence": round(self.probs.top1conf.item(), decimals),
|
|
|
|
+ }
|
|
|
|
+ )
|
|
|
|
+ return results
|
|
|
|
+
|
|
|
|
+ is_obb = self.obb is not None
|
|
|
|
+ data = self.obb if is_obb else self.boxes
|
|
|
|
+ h, w = self.orig_shape if normalize else (1, 1)
|
|
|
|
+ for i, row in enumerate(data): # xyxy, track_id if tracking, conf, class_id
|
|
|
|
+ class_id, conf = int(row.cls), round(row.conf.item(), decimals)
|
|
|
|
+ box = (row.xyxyxyxy if is_obb else row.xyxy).squeeze().reshape(-1, 2).tolist()
|
|
|
|
+ xy = {}
|
|
|
|
+ for j, b in enumerate(box):
|
|
|
|
+ xy[f"x{j + 1}"] = round(b[0] / w, decimals)
|
|
|
|
+ xy[f"y{j + 1}"] = round(b[1] / h, decimals)
|
|
|
|
+ result = {"name": self.names[class_id], "class": class_id, "confidence": conf, "box": xy}
|
|
|
|
+ if data.is_track:
|
|
|
|
+ result["track_id"] = int(row.id.item()) # track ID
|
|
|
|
+ if self.masks:
|
|
|
|
+ result["segments"] = {
|
|
|
|
+ "x": (self.masks.xy[i][:, 0] / w).round(decimals).tolist(),
|
|
|
|
+ "y": (self.masks.xy[i][:, 1] / h).round(decimals).tolist(),
|
|
|
|
+ }
|
|
|
|
+ if self.keypoints is not None:
|
|
|
|
+ x, y, visible = self.keypoints[i].data[0].cpu().unbind(dim=1) # torch Tensor
|
|
|
|
+ result["keypoints"] = {
|
|
|
|
+ "x": (x / w).numpy().round(decimals).tolist(), # decimals named argument required
|
|
|
|
+ "y": (y / h).numpy().round(decimals).tolist(),
|
|
|
|
+ "visible": visible.numpy().round(decimals).tolist(),
|
|
|
|
+ }
|
|
|
|
+ results.append(result)
|
|
|
|
+
|
|
|
|
+ return results
|
|
|
|
+
|
|
|
|
+ def to_df(self, normalize=False, decimals=5):
|
|
|
|
+ """
|
|
|
|
+ Converts detection results to a Pandas Dataframe.
|
|
|
|
+
|
|
|
|
+ This method converts the detection results into Pandas Dataframe format. It includes information
|
|
|
|
+ about detected objects such as bounding boxes, class names, confidence scores, and optionally
|
|
|
|
+ segmentation masks and keypoints.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
|
|
|
|
+ If True, coordinates will be returned as float values between 0 and 1. Defaults to False.
|
|
|
|
+ decimals (int): Number of decimal places to round the output values to. Defaults to 5.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (DataFrame): A Pandas Dataframe containing all the information in results in an organized way.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> df_result = results[0].to_df()
|
|
|
|
+ >>> print(df_result)
|
|
|
|
+ """
|
|
|
|
+ import pandas as pd # scope for faster 'import ultralytics'
|
|
|
|
+
|
|
|
|
+ return pd.DataFrame(self.summary(normalize=normalize, decimals=decimals))
|
|
|
|
+
|
|
|
|
+ def to_csv(self, normalize=False, decimals=5, *args, **kwargs):
|
|
|
|
+ """
|
|
|
|
+ Converts detection results to a CSV format.
|
|
|
|
+
|
|
|
|
+ This method serializes the detection results into a CSV format. It includes information
|
|
|
|
+ about detected objects such as bounding boxes, class names, confidence scores, and optionally
|
|
|
|
+ segmentation masks and keypoints.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
|
|
|
|
+ If True, coordinates will be returned as float values between 0 and 1. Defaults to False.
|
|
|
|
+ decimals (int): Number of decimal places to round the output values to. Defaults to 5.
|
|
|
|
+ *args (Any): Variable length argument list to be passed to pandas.DataFrame.to_csv().
|
|
|
|
+ **kwargs (Any): Arbitrary keyword arguments to be passed to pandas.DataFrame.to_csv().
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (str): CSV containing all the information in results in an organized way.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> csv_result = results[0].to_csv()
|
|
|
|
+ >>> print(csv_result)
|
|
|
|
+ """
|
|
|
|
+ return self.to_df(normalize=normalize, decimals=decimals).to_csv(*args, **kwargs)
|
|
|
|
+
|
|
|
|
+ def to_xml(self, normalize=False, decimals=5, *args, **kwargs):
|
|
|
|
+ """
|
|
|
|
+ Converts detection results to XML format.
|
|
|
|
+
|
|
|
|
+ This method serializes the detection results into an XML format. It includes information
|
|
|
|
+ about detected objects such as bounding boxes, class names, confidence scores, and optionally
|
|
|
|
+ segmentation masks and keypoints.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
|
|
|
|
+ If True, coordinates will be returned as float values between 0 and 1. Defaults to False.
|
|
|
|
+ decimals (int): Number of decimal places to round the output values to. Defaults to 5.
|
|
|
|
+ *args (Any): Variable length argument list to be passed to pandas.DataFrame.to_xml().
|
|
|
|
+ **kwargs (Any): Arbitrary keyword arguments to be passed to pandas.DataFrame.to_xml().
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (str): An XML string containing all the information in results in an organized way.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> xml_result = results[0].to_xml()
|
|
|
|
+ >>> print(xml_result)
|
|
|
|
+ """
|
|
|
|
+ check_requirements("lxml")
|
|
|
|
+ df = self.to_df(normalize=normalize, decimals=decimals)
|
|
|
|
+ return '<?xml version="1.0" encoding="utf-8"?>\n<root></root>' if df.empty else df.to_xml(*args, **kwargs)
|
|
|
|
+
|
|
|
|
+ def tojson(self, normalize=False, decimals=5):
|
|
|
|
+ """Deprecated version of to_json()."""
|
|
|
|
+ LOGGER.warning("WARNING ⚠️ 'result.tojson()' is deprecated, replace with 'result.to_json()'.")
|
|
|
|
+ return self.to_json(normalize, decimals)
|
|
|
|
+
|
|
|
|
+ def to_json(self, normalize=False, decimals=5):
|
|
|
|
+ """
|
|
|
|
+ Converts detection results to JSON format.
|
|
|
|
+
|
|
|
|
+ This method serializes the detection results into a JSON-compatible format. It includes information
|
|
|
|
+ about detected objects such as bounding boxes, class names, confidence scores, and optionally
|
|
|
|
+ segmentation masks and keypoints.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
|
|
|
|
+ If True, coordinates will be returned as float values between 0 and 1. Defaults to False.
|
|
|
|
+ decimals (int): Number of decimal places to round the output values to. Defaults to 5.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (str): A JSON string containing the serialized detection results.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> json_result = results[0].to_json()
|
|
|
|
+ >>> print(json_result)
|
|
|
|
+
|
|
|
|
+ Notes:
|
|
|
|
+ - For classification tasks, the JSON will contain class probabilities instead of bounding boxes.
|
|
|
|
+ - For object detection tasks, the JSON will include bounding box coordinates, class names, and
|
|
|
|
+ confidence scores.
|
|
|
|
+ - If available, segmentation masks and keypoints will also be included in the JSON output.
|
|
|
|
+ - The method uses the `summary` method internally to generate the data structure before
|
|
|
|
+ converting it to JSON.
|
|
|
|
+ """
|
|
|
|
+ import json
|
|
|
|
+
|
|
|
|
+ return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class Boxes(BaseTensor):
|
|
|
|
+ """
|
|
|
|
+ A class for managing and manipulating detection boxes.
|
|
|
|
+
|
|
|
|
+ This class provides functionality for handling detection boxes, including their coordinates, confidence scores,
|
|
|
|
+ class labels, and optional tracking IDs. It supports various box formats and offers methods for easy manipulation
|
|
|
|
+ and conversion between different coordinate systems.
|
|
|
|
+
|
|
|
|
+ Attributes:
|
|
|
|
+ data (torch.Tensor | numpy.ndarray): The raw tensor containing detection boxes and associated data.
|
|
|
|
+ orig_shape (Tuple[int, int]): The original image dimensions (height, width).
|
|
|
|
+ is_track (bool): Indicates whether tracking IDs are included in the box data.
|
|
|
|
+ xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format.
|
|
|
|
+ conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
|
|
|
|
+ cls (torch.Tensor | numpy.ndarray): Class labels for each box.
|
|
|
|
+ id (torch.Tensor | numpy.ndarray): Tracking IDs for each box (if available).
|
|
|
|
+ xywh (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format.
|
|
|
|
+ xyxyn (torch.Tensor | numpy.ndarray): Normalized [x1, y1, x2, y2] boxes relative to orig_shape.
|
|
|
|
+ xywhn (torch.Tensor | numpy.ndarray): Normalized [x, y, width, height] boxes relative to orig_shape.
|
|
|
|
+
|
|
|
|
+ Methods:
|
|
|
|
+ cpu(): Returns a copy of the object with all tensors on CPU memory.
|
|
|
|
+ numpy(): Returns a copy of the object with all tensors as numpy arrays.
|
|
|
|
+ cuda(): Returns a copy of the object with all tensors on GPU memory.
|
|
|
|
+ to(*args, **kwargs): Returns a copy of the object with tensors on specified device and dtype.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> import torch
|
|
|
|
+ >>> boxes_data = torch.tensor([[100, 50, 150, 100, 0.9, 0], [200, 150, 300, 250, 0.8, 1]])
|
|
|
|
+ >>> orig_shape = (480, 640) # height, width
|
|
|
|
+ >>> boxes = Boxes(boxes_data, orig_shape)
|
|
|
|
+ >>> print(boxes.xyxy)
|
|
|
|
+ >>> print(boxes.conf)
|
|
|
|
+ >>> print(boxes.cls)
|
|
|
|
+ >>> print(boxes.xywhn)
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+ def __init__(self, boxes, orig_shape) -> None:
|
|
|
|
+ """
|
|
|
|
+ Initialize the Boxes class with detection box data and the original image shape.
|
|
|
|
+
|
|
|
|
+ This class manages detection boxes, providing easy access and manipulation of box coordinates,
|
|
|
|
+ confidence scores, class identifiers, and optional tracking IDs. It supports multiple formats
|
|
|
|
+ for box coordinates, including both absolute and normalized forms.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape
|
|
|
|
+ (num_boxes, 6) or (num_boxes, 7). Columns should contain
|
|
|
|
+ [x1, y1, x2, y2, confidence, class, (optional) track_id].
|
|
|
|
+ orig_shape (Tuple[int, int]): The original image shape as (height, width). Used for normalization.
|
|
|
|
+
|
|
|
|
+ Attributes:
|
|
|
|
+ data (torch.Tensor): The raw tensor containing detection boxes and their associated data.
|
|
|
|
+ orig_shape (Tuple[int, int]): The original image size, used for normalization.
|
|
|
|
+ is_track (bool): Indicates whether tracking IDs are included in the box data.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> import torch
|
|
|
|
+ >>> boxes = torch.tensor([[100, 50, 150, 100, 0.9, 0]])
|
|
|
|
+ >>> orig_shape = (480, 640)
|
|
|
|
+ >>> detection_boxes = Boxes(boxes, orig_shape)
|
|
|
|
+ >>> print(detection_boxes.xyxy)
|
|
|
|
+ tensor([[100., 50., 150., 100.]])
|
|
|
|
+ """
|
|
|
|
+ if boxes.ndim == 1:
|
|
|
|
+ boxes = boxes[None, :]
|
|
|
|
+ n = boxes.shape[-1]
|
|
|
|
+ assert n in {6, 7}, f"expected 6 or 7 values but got {n}" # xyxy, track_id, conf, cls
|
|
|
|
+ super().__init__(boxes, orig_shape)
|
|
|
|
+ self.is_track = n == 7
|
|
|
|
+ self.orig_shape = orig_shape
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ def xyxy(self):
|
|
|
|
+ """
|
|
|
|
+ Returns bounding boxes in [x1, y1, x2, y2] format.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): A tensor or numpy array of shape (n, 4) containing bounding box
|
|
|
|
+ coordinates in [x1, y1, x2, y2] format, where n is the number of boxes.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg")
|
|
|
|
+ >>> boxes = results[0].boxes
|
|
|
|
+ >>> xyxy = boxes.xyxy
|
|
|
|
+ >>> print(xyxy)
|
|
|
|
+ """
|
|
|
|
+ return self.data[:, :4]
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ def conf(self):
|
|
|
|
+ """
|
|
|
|
+ Returns the confidence scores for each detection box.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): A 1D tensor or array containing confidence scores for each detection,
|
|
|
|
+ with shape (N,) where N is the number of detections.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> boxes = Boxes(torch.tensor([[10, 20, 30, 40, 0.9, 0]]), orig_shape=(100, 100))
|
|
|
|
+ >>> conf_scores = boxes.conf
|
|
|
|
+ >>> print(conf_scores)
|
|
|
|
+ tensor([0.9000])
|
|
|
|
+ """
|
|
|
|
+ return self.data[:, -2]
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ def cls(self):
|
|
|
|
+ """
|
|
|
|
+ Returns the class ID tensor representing category predictions for each bounding box.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the class IDs for each detection box.
|
|
|
|
+ The shape is (N,), where N is the number of boxes.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg")
|
|
|
|
+ >>> boxes = results[0].boxes
|
|
|
|
+ >>> class_ids = boxes.cls
|
|
|
|
+ >>> print(class_ids) # tensor([0., 2., 1.])
|
|
|
|
+ """
|
|
|
|
+ return self.data[:, -1]
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ def id(self):
|
|
|
|
+ """
|
|
|
|
+ Returns the tracking IDs for each detection box if available.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | None): A tensor containing tracking IDs for each box if tracking is enabled,
|
|
|
|
+ otherwise None. Shape is (N,) where N is the number of boxes.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model.track("path/to/video.mp4")
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... boxes = result.boxes
|
|
|
|
+ ... if boxes.is_track:
|
|
|
|
+ ... track_ids = boxes.id
|
|
|
|
+ ... print(f"Tracking IDs: {track_ids}")
|
|
|
|
+ ... else:
|
|
|
|
+ ... print("Tracking is not enabled for these boxes.")
|
|
|
|
+
|
|
|
|
+ Notes:
|
|
|
|
+ - This property is only available when tracking is enabled (i.e., when `is_track` is True).
|
|
|
|
+ - The tracking IDs are typically used to associate detections across multiple frames in video analysis.
|
|
|
|
+ """
|
|
|
|
+ return self.data[:, -3] if self.is_track else None
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=2) # maxsize 1 should suffice
|
|
|
|
+ def xywh(self):
|
|
|
|
+ """
|
|
|
|
+ Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): Boxes in [x_center, y_center, width, height] format, where x_center, y_center are the coordinates of
|
|
|
|
+ the center point of the bounding box, width, height are the dimensions of the bounding box and the
|
|
|
|
+ shape of the returned tensor is (N, 4), where N is the number of boxes.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100], [200, 150, 300, 250]]), orig_shape=(480, 640))
|
|
|
|
+ >>> xywh = boxes.xywh
|
|
|
|
+ >>> print(xywh)
|
|
|
|
+ tensor([[100.0000, 50.0000, 50.0000, 50.0000],
|
|
|
|
+ [200.0000, 150.0000, 100.0000, 100.0000]])
|
|
|
|
+ """
|
|
|
|
+ return ops.xyxy2xywh(self.xyxy)
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=2)
|
|
|
|
+ def xyxyn(self):
|
|
|
|
+ """
|
|
|
|
+ Returns normalized bounding box coordinates relative to the original image size.
|
|
|
|
+
|
|
|
|
+ This property calculates and returns the bounding box coordinates in [x1, y1, x2, y2] format,
|
|
|
|
+ normalized to the range [0, 1] based on the original image dimensions.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): Normalized bounding box coordinates with shape (N, 4), where N is
|
|
|
|
+ the number of boxes. Each row contains [x1, y1, x2, y2] values normalized to [0, 1].
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> boxes = Boxes(torch.tensor([[100, 50, 300, 400, 0.9, 0]]), orig_shape=(480, 640))
|
|
|
|
+ >>> normalized = boxes.xyxyn
|
|
|
|
+ >>> print(normalized)
|
|
|
|
+ tensor([[0.1562, 0.1042, 0.4688, 0.8333]])
|
|
|
|
+ """
|
|
|
|
+ xyxy = self.xyxy.clone() if isinstance(self.xyxy, torch.Tensor) else np.copy(self.xyxy)
|
|
|
|
+ xyxy[..., [0, 2]] /= self.orig_shape[1]
|
|
|
|
+ xyxy[..., [1, 3]] /= self.orig_shape[0]
|
|
|
|
+ return xyxy
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=2)
|
|
|
|
+ def xywhn(self):
|
|
|
|
+ """
|
|
|
|
+ Returns normalized bounding boxes in [x, y, width, height] format.
|
|
|
|
+
|
|
|
|
+ This property calculates and returns the normalized bounding box coordinates in the format
|
|
|
|
+ [x_center, y_center, width, height], where all values are relative to the original image dimensions.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): Normalized bounding boxes with shape (N, 4), where N is the
|
|
|
|
+ number of boxes. Each row contains [x_center, y_center, width, height] values normalized
|
|
|
|
+ to [0, 1] based on the original image dimensions.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100, 0.9, 0]]), orig_shape=(480, 640))
|
|
|
|
+ >>> normalized = boxes.xywhn
|
|
|
|
+ >>> print(normalized)
|
|
|
|
+ tensor([[0.1953, 0.1562, 0.0781, 0.1042]])
|
|
|
|
+ """
|
|
|
|
+ xywh = ops.xyxy2xywh(self.xyxy)
|
|
|
|
+ xywh[..., [0, 2]] /= self.orig_shape[1]
|
|
|
|
+ xywh[..., [1, 3]] /= self.orig_shape[0]
|
|
|
|
+ return xywh
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class Masks(BaseTensor):
|
|
|
|
+ """
|
|
|
|
+ A class for storing and manipulating detection masks.
|
|
|
|
+
|
|
|
|
+ This class extends BaseTensor and provides functionality for handling segmentation masks,
|
|
|
|
+ including methods for converting between pixel and normalized coordinates.
|
|
|
|
+
|
|
|
|
+ Attributes:
|
|
|
|
+ data (torch.Tensor | numpy.ndarray): The raw tensor or array containing mask data.
|
|
|
|
+ orig_shape (tuple): Original image shape in (height, width) format.
|
|
|
|
+ xy (List[numpy.ndarray]): A list of segments in pixel coordinates.
|
|
|
|
+ xyn (List[numpy.ndarray]): A list of normalized segments.
|
|
|
|
+
|
|
|
|
+ Methods:
|
|
|
|
+ cpu(): Returns a copy of the Masks object with the mask tensor on CPU memory.
|
|
|
|
+ numpy(): Returns a copy of the Masks object with the mask tensor as a numpy array.
|
|
|
|
+ cuda(): Returns a copy of the Masks object with the mask tensor on GPU memory.
|
|
|
|
+ to(*args, **kwargs): Returns a copy of the Masks object with the mask tensor on specified device and dtype.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> masks_data = torch.rand(1, 160, 160)
|
|
|
|
+ >>> orig_shape = (720, 1280)
|
|
|
|
+ >>> masks = Masks(masks_data, orig_shape)
|
|
|
|
+ >>> pixel_coords = masks.xy
|
|
|
|
+ >>> normalized_coords = masks.xyn
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+ def __init__(self, masks, orig_shape) -> None:
|
|
|
|
+ """
|
|
|
|
+ Initialize the Masks class with detection mask data and the original image shape.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ masks (torch.Tensor | np.ndarray): Detection masks with shape (num_masks, height, width).
|
|
|
|
+ orig_shape (tuple): The original image shape as (height, width). Used for normalization.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> import torch
|
|
|
|
+ >>> from ultralytics.engine.results import Masks
|
|
|
|
+ >>> masks = torch.rand(10, 160, 160) # 10 masks of 160x160 resolution
|
|
|
|
+ >>> orig_shape = (720, 1280) # Original image shape
|
|
|
|
+ >>> mask_obj = Masks(masks, orig_shape)
|
|
|
|
+ """
|
|
|
|
+ if masks.ndim == 2:
|
|
|
|
+ masks = masks[None, :]
|
|
|
|
+ super().__init__(masks, orig_shape)
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=1)
|
|
|
|
+ def xyn(self):
|
|
|
|
+ """
|
|
|
|
+ Returns normalized xy-coordinates of the segmentation masks.
|
|
|
|
+
|
|
|
|
+ This property calculates and caches the normalized xy-coordinates of the segmentation masks. The coordinates
|
|
|
|
+ are normalized relative to the original image shape.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (List[numpy.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates
|
|
|
|
+ of a single segmentation mask. Each array has shape (N, 2), where N is the number of points in the
|
|
|
|
+ mask contour.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg")
|
|
|
|
+ >>> masks = results[0].masks
|
|
|
|
+ >>> normalized_coords = masks.xyn
|
|
|
|
+ >>> print(normalized_coords[0]) # Normalized coordinates of the first mask
|
|
|
|
+ """
|
|
|
|
+ return [
|
|
|
|
+ ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True)
|
|
|
|
+ for x in ops.masks2segments(self.data)
|
|
|
|
+ ]
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=1)
|
|
|
|
+ def xy(self):
|
|
|
|
+ """
|
|
|
|
+ Returns the [x, y] pixel coordinates for each segment in the mask tensor.
|
|
|
|
+
|
|
|
|
+ This property calculates and returns a list of pixel coordinates for each segmentation mask in the
|
|
|
|
+ Masks object. The coordinates are scaled to match the original image dimensions.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (List[numpy.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel
|
|
|
|
+ coordinates for a single segmentation mask. Each array has shape (N, 2), where N is the
|
|
|
|
+ number of points in the segment.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg")
|
|
|
|
+ >>> masks = results[0].masks
|
|
|
|
+ >>> xy_coords = masks.xy
|
|
|
|
+ >>> print(len(xy_coords)) # Number of masks
|
|
|
|
+ >>> print(xy_coords[0].shape) # Shape of first mask's coordinates
|
|
|
|
+ """
|
|
|
|
+ return [
|
|
|
|
+ ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False)
|
|
|
|
+ for x in ops.masks2segments(self.data)
|
|
|
|
+ ]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class Keypoints(BaseTensor):
|
|
|
|
+ """
|
|
|
|
+ A class for storing and manipulating detection keypoints.
|
|
|
|
+
|
|
|
|
+ This class encapsulates functionality for handling keypoint data, including coordinate manipulation,
|
|
|
|
+ normalization, and confidence values.
|
|
|
|
+
|
|
|
|
+ Attributes:
|
|
|
|
+ data (torch.Tensor): The raw tensor containing keypoint data.
|
|
|
|
+ orig_shape (Tuple[int, int]): The original image dimensions (height, width).
|
|
|
|
+ has_visible (bool): Indicates whether visibility information is available for keypoints.
|
|
|
|
+ xy (torch.Tensor): Keypoint coordinates in [x, y] format.
|
|
|
|
+ xyn (torch.Tensor): Normalized keypoint coordinates in [x, y] format, relative to orig_shape.
|
|
|
|
+ conf (torch.Tensor): Confidence values for each keypoint, if available.
|
|
|
|
+
|
|
|
|
+ Methods:
|
|
|
|
+ cpu(): Returns a copy of the keypoints tensor on CPU memory.
|
|
|
|
+ numpy(): Returns a copy of the keypoints tensor as a numpy array.
|
|
|
|
+ cuda(): Returns a copy of the keypoints tensor on GPU memory.
|
|
|
|
+ to(*args, **kwargs): Returns a copy of the keypoints tensor with specified device and dtype.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> import torch
|
|
|
|
+ >>> from ultralytics.engine.results import Keypoints
|
|
|
|
+ >>> keypoints_data = torch.rand(1, 17, 3) # 1 detection, 17 keypoints, (x, y, conf)
|
|
|
|
+ >>> orig_shape = (480, 640) # Original image shape (height, width)
|
|
|
|
+ >>> keypoints = Keypoints(keypoints_data, orig_shape)
|
|
|
|
+ >>> print(keypoints.xy.shape) # Access xy coordinates
|
|
|
|
+ >>> print(keypoints.conf) # Access confidence values
|
|
|
|
+ >>> keypoints_cpu = keypoints.cpu() # Move keypoints to CPU
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+ @smart_inference_mode() # avoid keypoints < conf in-place error
|
|
|
|
+ def __init__(self, keypoints, orig_shape) -> None:
|
|
|
|
+ """
|
|
|
|
+ Initializes the Keypoints object with detection keypoints and original image dimensions.
|
|
|
|
+
|
|
|
|
+ This method processes the input keypoints tensor, handling both 2D and 3D formats. For 3D tensors
|
|
|
|
+ (x, y, confidence), it masks out low-confidence keypoints by setting their coordinates to zero.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ keypoints (torch.Tensor): A tensor containing keypoint data. Shape can be either:
|
|
|
|
+ - (num_objects, num_keypoints, 2) for x, y coordinates only
|
|
|
|
+ - (num_objects, num_keypoints, 3) for x, y coordinates and confidence scores
|
|
|
|
+ orig_shape (Tuple[int, int]): The original image dimensions (height, width).
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> kpts = torch.rand(1, 17, 3) # 1 object, 17 keypoints (COCO format), x,y,conf
|
|
|
|
+ >>> orig_shape = (720, 1280) # Original image height, width
|
|
|
|
+ >>> keypoints = Keypoints(kpts, orig_shape)
|
|
|
|
+ """
|
|
|
|
+ if keypoints.ndim == 2:
|
|
|
|
+ keypoints = keypoints[None, :]
|
|
|
|
+ if keypoints.shape[2] == 3: # x, y, conf
|
|
|
|
+ mask = keypoints[..., 2] < 0.5 # points with conf < 0.5 (not visible)
|
|
|
|
+ keypoints[..., :2][mask] = 0
|
|
|
|
+ super().__init__(keypoints, orig_shape)
|
|
|
|
+ self.has_visible = self.data.shape[-1] == 3
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=1)
|
|
|
|
+ def xy(self):
|
|
|
|
+ """
|
|
|
|
+ Returns x, y coordinates of keypoints.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor): A tensor containing the x, y coordinates of keypoints with shape (N, K, 2), where N is
|
|
|
|
+ the number of detections and K is the number of keypoints per detection.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg")
|
|
|
|
+ >>> keypoints = results[0].keypoints
|
|
|
|
+ >>> xy = keypoints.xy
|
|
|
|
+ >>> print(xy.shape) # (N, K, 2)
|
|
|
|
+ >>> print(xy[0]) # x, y coordinates of keypoints for first detection
|
|
|
|
+
|
|
|
|
+ Notes:
|
|
|
|
+ - The returned coordinates are in pixel units relative to the original image dimensions.
|
|
|
|
+ - If keypoints were initialized with confidence values, only keypoints with confidence >= 0.5 are returned.
|
|
|
|
+ - This property uses LRU caching to improve performance on repeated access.
|
|
|
|
+ """
|
|
|
|
+ return self.data[..., :2]
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=1)
|
|
|
|
+ def xyn(self):
|
|
|
|
+ """
|
|
|
|
+ Returns normalized coordinates (x, y) of keypoints relative to the original image size.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): A tensor or array of shape (N, K, 2) containing normalized keypoint
|
|
|
|
+ coordinates, where N is the number of instances, K is the number of keypoints, and the last
|
|
|
|
+ dimension contains [x, y] values in the range [0, 1].
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> keypoints = Keypoints(torch.rand(1, 17, 2), orig_shape=(480, 640))
|
|
|
|
+ >>> normalized_kpts = keypoints.xyn
|
|
|
|
+ >>> print(normalized_kpts.shape)
|
|
|
|
+ torch.Size([1, 17, 2])
|
|
|
|
+ """
|
|
|
|
+ xy = self.xy.clone() if isinstance(self.xy, torch.Tensor) else np.copy(self.xy)
|
|
|
|
+ xy[..., 0] /= self.orig_shape[1]
|
|
|
|
+ xy[..., 1] /= self.orig_shape[0]
|
|
|
|
+ return xy
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=1)
|
|
|
|
+ def conf(self):
|
|
|
|
+ """
|
|
|
|
+ Returns confidence values for each keypoint.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | None): A tensor containing confidence scores for each keypoint if available,
|
|
|
|
+ otherwise None. Shape is (num_detections, num_keypoints) for batched data or (num_keypoints,)
|
|
|
|
+ for single detection.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> keypoints = Keypoints(torch.rand(1, 17, 3), orig_shape=(640, 640)) # 1 detection, 17 keypoints
|
|
|
|
+ >>> conf = keypoints.conf
|
|
|
|
+ >>> print(conf.shape) # torch.Size([1, 17])
|
|
|
|
+ """
|
|
|
|
+ return self.data[..., 2] if self.has_visible else None
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class Probs(BaseTensor):
|
|
|
|
+ """
|
|
|
|
+ A class for storing and manipulating classification probabilities.
|
|
|
|
+
|
|
|
|
+ This class extends BaseTensor and provides methods for accessing and manipulating
|
|
|
|
+ classification probabilities, including top-1 and top-5 predictions.
|
|
|
|
+
|
|
|
|
+ Attributes:
|
|
|
|
+ data (torch.Tensor | numpy.ndarray): The raw tensor or array containing classification probabilities.
|
|
|
|
+ orig_shape (tuple | None): The original image shape as (height, width). Not used in this class.
|
|
|
|
+ top1 (int): Index of the class with the highest probability.
|
|
|
|
+ top5 (List[int]): Indices of the top 5 classes by probability.
|
|
|
|
+ top1conf (torch.Tensor | numpy.ndarray): Confidence score of the top 1 class.
|
|
|
|
+ top5conf (torch.Tensor | numpy.ndarray): Confidence scores of the top 5 classes.
|
|
|
|
+
|
|
|
|
+ Methods:
|
|
|
|
+ cpu(): Returns a copy of the probabilities tensor on CPU memory.
|
|
|
|
+ numpy(): Returns a copy of the probabilities tensor as a numpy array.
|
|
|
|
+ cuda(): Returns a copy of the probabilities tensor on GPU memory.
|
|
|
|
+ to(*args, **kwargs): Returns a copy of the probabilities tensor with specified device and dtype.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> probs = torch.tensor([0.1, 0.3, 0.6])
|
|
|
|
+ >>> p = Probs(probs)
|
|
|
|
+ >>> print(p.top1)
|
|
|
|
+ 2
|
|
|
|
+ >>> print(p.top5)
|
|
|
|
+ [2, 1, 0]
|
|
|
|
+ >>> print(p.top1conf)
|
|
|
|
+ tensor(0.6000)
|
|
|
|
+ >>> print(p.top5conf)
|
|
|
|
+ tensor([0.6000, 0.3000, 0.1000])
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+ def __init__(self, probs, orig_shape=None) -> None:
|
|
|
|
+ """
|
|
|
|
+ Initialize the Probs class with classification probabilities.
|
|
|
|
+
|
|
|
|
+ This class stores and manages classification probabilities, providing easy access to top predictions and their
|
|
|
|
+ confidences.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ probs (torch.Tensor | np.ndarray): A 1D tensor or array of classification probabilities.
|
|
|
|
+ orig_shape (tuple | None): The original image shape as (height, width). Not used in this class but kept for
|
|
|
|
+ consistency with other result classes.
|
|
|
|
+
|
|
|
|
+ Attributes:
|
|
|
|
+ data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
|
|
|
|
+ top1 (int): Index of the top 1 class.
|
|
|
|
+ top5 (List[int]): Indices of the top 5 classes.
|
|
|
|
+ top1conf (torch.Tensor | np.ndarray): Confidence of the top 1 class.
|
|
|
|
+ top5conf (torch.Tensor | np.ndarray): Confidences of the top 5 classes.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> import torch
|
|
|
|
+ >>> probs = torch.tensor([0.1, 0.3, 0.2, 0.4])
|
|
|
|
+ >>> p = Probs(probs)
|
|
|
|
+ >>> print(p.top1)
|
|
|
|
+ 3
|
|
|
|
+ >>> print(p.top1conf)
|
|
|
|
+ tensor(0.4000)
|
|
|
|
+ >>> print(p.top5)
|
|
|
|
+ [3, 1, 2, 0]
|
|
|
|
+ """
|
|
|
|
+ super().__init__(probs, orig_shape)
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=1)
|
|
|
|
+ def top1(self):
|
|
|
|
+ """
|
|
|
|
+ Returns the index of the class with the highest probability.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (int): Index of the class with the highest probability.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> probs = Probs(torch.tensor([0.1, 0.3, 0.6]))
|
|
|
|
+ >>> probs.top1
|
|
|
|
+ 2
|
|
|
|
+ """
|
|
|
|
+ return int(self.data.argmax())
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=1)
|
|
|
|
+ def top5(self):
|
|
|
|
+ """
|
|
|
|
+ Returns the indices of the top 5 class probabilities.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (List[int]): A list containing the indices of the top 5 class probabilities, sorted in descending order.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> probs = Probs(torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]))
|
|
|
|
+ >>> print(probs.top5)
|
|
|
|
+ [4, 3, 2, 1, 0]
|
|
|
|
+ """
|
|
|
|
+ return (-self.data).argsort(0)[:5].tolist() # this way works with both torch and numpy.
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=1)
|
|
|
|
+ def top1conf(self):
|
|
|
|
+ """
|
|
|
|
+ Returns the confidence score of the highest probability class.
|
|
|
|
+
|
|
|
|
+ This property retrieves the confidence score (probability) of the class with the highest predicted probability
|
|
|
|
+ from the classification results.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): A tensor containing the confidence score of the top 1 class.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg") # classify an image
|
|
|
|
+ >>> probs = results[0].probs # get classification probabilities
|
|
|
|
+ >>> top1_confidence = probs.top1conf # get confidence of top 1 class
|
|
|
|
+ >>> print(f"Top 1 class confidence: {top1_confidence.item():.4f}")
|
|
|
|
+ """
|
|
|
|
+ return self.data[self.top1]
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=1)
|
|
|
|
+ def top5conf(self):
|
|
|
|
+ """
|
|
|
|
+ Returns confidence scores for the top 5 classification predictions.
|
|
|
|
+
|
|
|
|
+ This property retrieves the confidence scores corresponding to the top 5 class probabilities
|
|
|
|
+ predicted by the model. It provides a quick way to access the most likely class predictions
|
|
|
|
+ along with their associated confidence levels.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): A tensor or array containing the confidence scores for the
|
|
|
|
+ top 5 predicted classes, sorted in descending order of probability.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg")
|
|
|
|
+ >>> probs = results[0].probs
|
|
|
|
+ >>> top5_conf = probs.top5conf
|
|
|
|
+ >>> print(top5_conf) # Prints confidence scores for top 5 classes
|
|
|
|
+ """
|
|
|
|
+ return self.data[self.top5]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class OBB(BaseTensor):
|
|
|
|
+ """
|
|
|
|
+ A class for storing and manipulating Oriented Bounding Boxes (OBB).
|
|
|
|
+
|
|
|
|
+ This class provides functionality to handle oriented bounding boxes, including conversion between
|
|
|
|
+ different formats, normalization, and access to various properties of the boxes.
|
|
|
|
+
|
|
|
|
+ Attributes:
|
|
|
|
+ data (torch.Tensor): The raw OBB tensor containing box coordinates and associated data.
|
|
|
|
+ orig_shape (tuple): Original image size as (height, width).
|
|
|
|
+ is_track (bool): Indicates whether tracking IDs are included in the box data.
|
|
|
|
+ xywhr (torch.Tensor | numpy.ndarray): Boxes in [x_center, y_center, width, height, rotation] format.
|
|
|
|
+ conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
|
|
|
|
+ cls (torch.Tensor | numpy.ndarray): Class labels for each box.
|
|
|
|
+ id (torch.Tensor | numpy.ndarray): Tracking IDs for each box, if available.
|
|
|
|
+ xyxyxyxy (torch.Tensor | numpy.ndarray): Boxes in 8-point [x1, y1, x2, y2, x3, y3, x4, y4] format.
|
|
|
|
+ xyxyxyxyn (torch.Tensor | numpy.ndarray): Normalized 8-point coordinates relative to orig_shape.
|
|
|
|
+ xyxy (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in [x1, y1, x2, y2] format.
|
|
|
|
+
|
|
|
|
+ Methods:
|
|
|
|
+ cpu(): Returns a copy of the OBB object with all tensors on CPU memory.
|
|
|
|
+ numpy(): Returns a copy of the OBB object with all tensors as numpy arrays.
|
|
|
|
+ cuda(): Returns a copy of the OBB object with all tensors on GPU memory.
|
|
|
|
+ to(*args, **kwargs): Returns a copy of the OBB object with tensors on specified device and dtype.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> boxes = torch.tensor([[100, 50, 150, 100, 30, 0.9, 0]]) # xywhr, conf, cls
|
|
|
|
+ >>> obb = OBB(boxes, orig_shape=(480, 640))
|
|
|
|
+ >>> print(obb.xyxyxyxy)
|
|
|
|
+ >>> print(obb.conf)
|
|
|
|
+ >>> print(obb.cls)
|
|
|
|
+ """
|
|
|
|
+
|
|
|
|
+ def __init__(self, boxes, orig_shape) -> None:
|
|
|
|
+ """
|
|
|
|
+ Initialize an OBB (Oriented Bounding Box) instance with oriented bounding box data and original image shape.
|
|
|
|
+
|
|
|
|
+ This class stores and manipulates Oriented Bounding Boxes (OBB) for object detection tasks. It provides
|
|
|
|
+ various properties and methods to access and transform the OBB data.
|
|
|
|
+
|
|
|
|
+ Args:
|
|
|
|
+ boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes,
|
|
|
|
+ with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values.
|
|
|
|
+ If present, the third last column contains track IDs, and the fifth column contains rotation.
|
|
|
|
+ orig_shape (Tuple[int, int]): Original image size, in the format (height, width).
|
|
|
|
+
|
|
|
|
+ Attributes:
|
|
|
|
+ data (torch.Tensor | numpy.ndarray): The raw OBB tensor.
|
|
|
|
+ orig_shape (Tuple[int, int]): The original image shape.
|
|
|
|
+ is_track (bool): Whether the boxes include tracking IDs.
|
|
|
|
+
|
|
|
|
+ Raises:
|
|
|
|
+ AssertionError: If the number of values per box is not 7 or 8.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> import torch
|
|
|
|
+ >>> boxes = torch.rand(3, 7) # 3 boxes with 7 values each
|
|
|
|
+ >>> orig_shape = (640, 480)
|
|
|
|
+ >>> obb = OBB(boxes, orig_shape)
|
|
|
|
+ >>> print(obb.xywhr) # Access the boxes in xywhr format
|
|
|
|
+ """
|
|
|
|
+ if boxes.ndim == 1:
|
|
|
|
+ boxes = boxes[None, :]
|
|
|
|
+ n = boxes.shape[-1]
|
|
|
|
+ assert n in {7, 8}, f"expected 7 or 8 values but got {n}" # xywh, rotation, track_id, conf, cls
|
|
|
|
+ super().__init__(boxes, orig_shape)
|
|
|
|
+ self.is_track = n == 8
|
|
|
|
+ self.orig_shape = orig_shape
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ def xywhr(self):
|
|
|
|
+ """
|
|
|
|
+ Returns boxes in [x_center, y_center, width, height, rotation] format.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the oriented bounding boxes with format
|
|
|
|
+ [x_center, y_center, width, height, rotation]. The shape is (N, 5) where N is the number of boxes.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg")
|
|
|
|
+ >>> obb = results[0].obb
|
|
|
|
+ >>> xywhr = obb.xywhr
|
|
|
|
+ >>> print(xywhr.shape)
|
|
|
|
+ torch.Size([3, 5])
|
|
|
|
+ """
|
|
|
|
+ return self.data[:, :5]
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ def conf(self):
|
|
|
|
+ """
|
|
|
|
+ Returns the confidence scores for Oriented Bounding Boxes (OBBs).
|
|
|
|
+
|
|
|
|
+ This property retrieves the confidence values associated with each OBB detection. The confidence score
|
|
|
|
+ represents the model's certainty in the detection.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): A tensor or numpy array of shape (N,) containing confidence scores
|
|
|
|
+ for N detections, where each score is in the range [0, 1].
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg")
|
|
|
|
+ >>> obb_result = results[0].obb
|
|
|
|
+ >>> confidence_scores = obb_result.conf
|
|
|
|
+ >>> print(confidence_scores)
|
|
|
|
+ """
|
|
|
|
+ return self.data[:, -2]
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ def cls(self):
|
|
|
|
+ """
|
|
|
|
+ Returns the class values of the oriented bounding boxes.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the class values for each oriented
|
|
|
|
+ bounding box. The shape is (N,), where N is the number of boxes.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg")
|
|
|
|
+ >>> result = results[0]
|
|
|
|
+ >>> obb = result.obb
|
|
|
|
+ >>> class_values = obb.cls
|
|
|
|
+ >>> print(class_values)
|
|
|
|
+ """
|
|
|
|
+ return self.data[:, -1]
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ def id(self):
|
|
|
|
+ """
|
|
|
|
+ Returns the tracking IDs of the oriented bounding boxes (if available).
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray | None): A tensor or numpy array containing the tracking IDs for each
|
|
|
|
+ oriented bounding box. Returns None if tracking IDs are not available.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> results = model("image.jpg", tracker=True) # Run inference with tracking
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... if result.obb is not None:
|
|
|
|
+ ... track_ids = result.obb.id
|
|
|
|
+ ... if track_ids is not None:
|
|
|
|
+ ... print(f"Tracking IDs: {track_ids}")
|
|
|
|
+ """
|
|
|
|
+ return self.data[:, -3] if self.is_track else None
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=2)
|
|
|
|
+ def xyxyxyxy(self):
|
|
|
|
+ """
|
|
|
|
+ Converts OBB format to 8-point (xyxyxyxy) coordinate format for rotated bounding boxes.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): Rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), where N is
|
|
|
|
+ the number of boxes. Each box is represented by 4 points (x, y), starting from the top-left corner and
|
|
|
|
+ moving clockwise.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> obb = OBB(torch.tensor([[100, 100, 50, 30, 0.5, 0.9, 0]]), orig_shape=(640, 640))
|
|
|
|
+ >>> xyxyxyxy = obb.xyxyxyxy
|
|
|
|
+ >>> print(xyxyxyxy.shape)
|
|
|
|
+ torch.Size([1, 4, 2])
|
|
|
|
+ """
|
|
|
|
+ return ops.xywhr2xyxyxyxy(self.xywhr)
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=2)
|
|
|
|
+ def xyxyxyxyn(self):
|
|
|
|
+ """
|
|
|
|
+ Converts rotated bounding boxes to normalized xyxyxyxy format.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): Normalized rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2),
|
|
|
|
+ where N is the number of boxes. Each box is represented by 4 points (x, y), normalized relative to
|
|
|
|
+ the original image dimensions.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> obb = OBB(torch.rand(10, 7), orig_shape=(640, 480)) # 10 random OBBs
|
|
|
|
+ >>> normalized_boxes = obb.xyxyxyxyn
|
|
|
|
+ >>> print(normalized_boxes.shape)
|
|
|
|
+ torch.Size([10, 4, 2])
|
|
|
|
+ """
|
|
|
|
+ xyxyxyxyn = self.xyxyxyxy.clone() if isinstance(self.xyxyxyxy, torch.Tensor) else np.copy(self.xyxyxyxy)
|
|
|
|
+ xyxyxyxyn[..., 0] /= self.orig_shape[1]
|
|
|
|
+ xyxyxyxyn[..., 1] /= self.orig_shape[0]
|
|
|
|
+ return xyxyxyxyn
|
|
|
|
+
|
|
|
|
+ @property
|
|
|
|
+ @lru_cache(maxsize=2)
|
|
|
|
+ def xyxy(self):
|
|
|
|
+ """
|
|
|
|
+ Converts oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format.
|
|
|
|
+
|
|
|
|
+ This property calculates the minimal enclosing rectangle for each oriented bounding box and returns it in
|
|
|
|
+ xyxy format (x1, y1, x2, y2). This is useful for operations that require axis-aligned bounding boxes, such
|
|
|
|
+ as IoU calculation with non-rotated boxes.
|
|
|
|
+
|
|
|
|
+ Returns:
|
|
|
|
+ (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in xyxy format with shape (N, 4), where N
|
|
|
|
+ is the number of boxes. Each row contains [x1, y1, x2, y2] coordinates.
|
|
|
|
+
|
|
|
|
+ Examples:
|
|
|
|
+ >>> import torch
|
|
|
|
+ >>> from ultralytics import YOLO
|
|
|
|
+ >>> model = YOLO("yolov8n-obb.pt")
|
|
|
|
+ >>> results = model("path/to/image.jpg")
|
|
|
|
+ >>> for result in results:
|
|
|
|
+ ... obb = result.obb
|
|
|
|
+ ... if obb is not None:
|
|
|
|
+ ... xyxy_boxes = obb.xyxy
|
|
|
|
+ ... print(xyxy_boxes.shape) # (N, 4)
|
|
|
|
+
|
|
|
|
+ Notes:
|
|
|
|
+ - This method approximates the OBB by its minimal enclosing rectangle.
|
|
|
|
+ - The returned format is compatible with standard object detection metrics and visualization tools.
|
|
|
|
+ - The property uses caching to improve performance for repeated access.
|
|
|
|
+ """
|
|
|
|
+ x = self.xyxyxyxy[..., 0]
|
|
|
|
+ y = self.xyxyxyxy[..., 1]
|
|
|
|
+ return (
|
|
|
|
+ torch.stack([x.amin(1), y.amin(1), x.amax(1), y.amax(1)], -1)
|
|
|
|
+ if isinstance(x, torch.Tensor)
|
|
|
|
+ else np.stack([x.min(1), y.min(1), x.max(1), y.max(1)], -1)
|
|
|
|
+ )
|