123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740 |
- # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
- """
- Ultralytics Results, Boxes and Masks classes for handling inference results.
- Usage: See https://docs.ultralytics.com/modes/predict/
- """
- from copy import deepcopy
- from functools import lru_cache
- from pathlib import Path
- import numpy as np
- import torch
- from ultralytics.data.augment import LetterBox
- from ultralytics.utils import LOGGER, SimpleClass, ops
- from ultralytics.utils.checks import check_requirements
- from ultralytics.utils.plotting import Annotator, colors, save_one_box
- from ultralytics.utils.torch_utils import smart_inference_mode
- class BaseTensor(SimpleClass):
- """
- Base tensor class with additional methods for easy manipulation and device handling.
- Attributes:
- data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
- orig_shape (Tuple[int, int]): Original shape of the image, typically in the format (height, width).
- Methods:
- cpu: Return a copy of the tensor stored in CPU memory.
- numpy: Returns a copy of the tensor as a numpy array.
- cuda: Moves the tensor to GPU memory, returning a new instance if necessary.
- to: Return a copy of the tensor with the specified device and dtype.
- Examples:
- >>> import torch
- >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
- >>> orig_shape = (720, 1280)
- >>> base_tensor = BaseTensor(data, orig_shape)
- >>> cpu_tensor = base_tensor.cpu()
- >>> numpy_array = base_tensor.numpy()
- >>> gpu_tensor = base_tensor.cuda()
- """
- def __init__(self, data, orig_shape) -> None:
- """
- Initialize BaseTensor with prediction data and the original shape of the image.
- Args:
- data (torch.Tensor | np.ndarray): Prediction data such as bounding boxes, masks, or keypoints.
- orig_shape (Tuple[int, int]): Original shape of the image in (height, width) format.
- Examples:
- >>> import torch
- >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
- >>> orig_shape = (720, 1280)
- >>> base_tensor = BaseTensor(data, orig_shape)
- """
- assert isinstance(data, (torch.Tensor, np.ndarray)), "data must be torch.Tensor or np.ndarray"
- self.data = data
- self.orig_shape = orig_shape
- @property
- def shape(self):
- """
- Returns the shape of the underlying data tensor.
- Returns:
- (Tuple[int, ...]): The shape of the data tensor.
- Examples:
- >>> data = torch.rand(100, 4)
- >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
- >>> print(base_tensor.shape)
- (100, 4)
- """
- return self.data.shape
- def cpu(self):
- """
- Returns a copy of the tensor stored in CPU memory.
- Returns:
- (BaseTensor): A new BaseTensor object with the data tensor moved to CPU memory.
- Examples:
- >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]]).cuda()
- >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
- >>> cpu_tensor = base_tensor.cpu()
- >>> isinstance(cpu_tensor, BaseTensor)
- True
- >>> cpu_tensor.data.device
- device(type='cpu')
- """
- return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.cpu(), self.orig_shape)
- def numpy(self):
- """
- Returns a copy of the tensor as a numpy array.
- Returns:
- (np.ndarray): A numpy array containing the same data as the original tensor.
- Examples:
- >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
- >>> orig_shape = (720, 1280)
- >>> base_tensor = BaseTensor(data, orig_shape)
- >>> numpy_array = base_tensor.numpy()
- >>> print(type(numpy_array))
- <class 'numpy.ndarray'>
- """
- return self if isinstance(self.data, np.ndarray) else self.__class__(self.data.numpy(), self.orig_shape)
- def cuda(self):
- """
- Moves the tensor to GPU memory.
- Returns:
- (BaseTensor): A new BaseTensor instance with the data moved to GPU memory if it's not already a
- numpy array, otherwise returns self.
- Examples:
- >>> import torch
- >>> from ultralytics.engine.results import BaseTensor
- >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
- >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
- >>> gpu_tensor = base_tensor.cuda()
- >>> print(gpu_tensor.data.device)
- cuda:0
- """
- return self.__class__(torch.as_tensor(self.data).cuda(), self.orig_shape)
- def to(self, *args, **kwargs):
- """
- Return a copy of the tensor with the specified device and dtype.
- Args:
- *args (Any): Variable length argument list to be passed to torch.Tensor.to().
- **kwargs (Any): Arbitrary keyword arguments to be passed to torch.Tensor.to().
- Returns:
- (BaseTensor): A new BaseTensor instance with the data moved to the specified device and/or dtype.
- Examples:
- >>> base_tensor = BaseTensor(torch.randn(3, 4), orig_shape=(480, 640))
- >>> cuda_tensor = base_tensor.to("cuda")
- >>> float16_tensor = base_tensor.to(dtype=torch.float16)
- """
- return self.__class__(torch.as_tensor(self.data).to(*args, **kwargs), self.orig_shape)
- def __len__(self): # override len(results)
- """
- Returns the length of the underlying data tensor.
- Returns:
- (int): The number of elements in the first dimension of the data tensor.
- Examples:
- >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
- >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
- >>> len(base_tensor)
- 2
- """
- return len(self.data)
- def __getitem__(self, idx):
- """
- Returns a new BaseTensor instance containing the specified indexed elements of the data tensor.
- Args:
- idx (int | List[int] | torch.Tensor): Index or indices to select from the data tensor.
- Returns:
- (BaseTensor): A new BaseTensor instance containing the indexed data.
- Examples:
- >>> data = torch.tensor([[1, 2, 3], [4, 5, 6]])
- >>> base_tensor = BaseTensor(data, orig_shape=(720, 1280))
- >>> result = base_tensor[0] # Select the first row
- >>> print(result.data)
- tensor([1, 2, 3])
- """
- return self.__class__(self.data[idx], self.orig_shape)
- class Results(SimpleClass):
- """
- A class for storing and manipulating inference results.
- This class encapsulates the functionality for handling detection, segmentation, pose estimation,
- and classification results from YOLO models.
- Attributes:
- orig_img (numpy.ndarray): Original image as a numpy array.
- orig_shape (Tuple[int, int]): Original image shape in (height, width) format.
- boxes (Boxes | None): Object containing detection bounding boxes.
- masks (Masks | None): Object containing detection masks.
- probs (Probs | None): Object containing class probabilities for classification tasks.
- keypoints (Keypoints | None): Object containing detected keypoints for each object.
- obb (OBB | None): Object containing oriented bounding boxes.
- speed (Dict[str, float | None]): Dictionary of preprocess, inference, and postprocess speeds.
- names (Dict[int, str]): Dictionary mapping class IDs to class names.
- path (str): Path to the image file.
- _keys (Tuple[str, ...]): Tuple of attribute names for internal use.
- Methods:
- update: Updates object attributes with new detection results.
- cpu: Returns a copy of the Results object with all tensors on CPU memory.
- numpy: Returns a copy of the Results object with all tensors as numpy arrays.
- cuda: Returns a copy of the Results object with all tensors on GPU memory.
- to: Returns a copy of the Results object with tensors on a specified device and dtype.
- new: Returns a new Results object with the same image, path, and names.
- plot: Plots detection results on an input image, returning an annotated image.
- show: Shows annotated results on screen.
- save: Saves annotated results to file.
- verbose: Returns a log string for each task, detailing detections and classifications.
- save_txt: Saves detection results to a text file.
- save_crop: Saves cropped detection images.
- tojson: Converts detection results to JSON format.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> for result in results:
- ... print(result.boxes) # Print detection boxes
- ... result.show() # Display the annotated image
- ... result.save(filename="result.jpg") # Save annotated image
- """
- def __init__(
- self, orig_img, path, names, boxes=None, masks=None, probs=None, keypoints=None, obb=None, speed=None
- ) -> None:
- """
- Initialize the Results class for storing and manipulating inference results.
- Args:
- orig_img (numpy.ndarray): The original image as a numpy array.
- path (str): The path to the image file.
- names (Dict): A dictionary of class names.
- boxes (torch.Tensor | None): A 2D tensor of bounding box coordinates for each detection.
- masks (torch.Tensor | None): A 3D tensor of detection masks, where each mask is a binary image.
- probs (torch.Tensor | None): A 1D tensor of probabilities of each class for classification task.
- keypoints (torch.Tensor | None): A 2D tensor of keypoint coordinates for each detection.
- obb (torch.Tensor | None): A 2D tensor of oriented bounding box coordinates for each detection.
- speed (Dict | None): A dictionary containing preprocess, inference, and postprocess speeds (ms/image).
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> result = results[0] # Get the first result
- >>> boxes = result.boxes # Get the boxes for the first result
- >>> masks = result.masks # Get the masks for the first result
- Notes:
- For the default pose model, keypoint indices for human body pose estimation are:
- 0: Nose, 1: Left Eye, 2: Right Eye, 3: Left Ear, 4: Right Ear
- 5: Left Shoulder, 6: Right Shoulder, 7: Left Elbow, 8: Right Elbow
- 9: Left Wrist, 10: Right Wrist, 11: Left Hip, 12: Right Hip
- 13: Left Knee, 14: Right Knee, 15: Left Ankle, 16: Right Ankle
- """
- self.orig_img = orig_img
- self.orig_shape = orig_img.shape[:2]
- self.boxes = Boxes(boxes, self.orig_shape) if boxes is not None else None # native size boxes
- self.masks = Masks(masks, self.orig_shape) if masks is not None else None # native size or imgsz masks
- self.probs = Probs(probs) if probs is not None else None
- self.keypoints = Keypoints(keypoints, self.orig_shape) if keypoints is not None else None
- self.obb = OBB(obb, self.orig_shape) if obb is not None else None
- self.speed = speed if speed is not None else {"preprocess": None, "inference": None, "postprocess": None}
- self.names = names
- self.path = path
- self.save_dir = None
- self._keys = "boxes", "masks", "probs", "keypoints", "obb"
- def __getitem__(self, idx):
- """
- Return a Results object for a specific index of inference results.
- Args:
- idx (int | slice): Index or slice to retrieve from the Results object.
- Returns:
- (Results): A new Results object containing the specified subset of inference results.
- Examples:
- >>> results = model("path/to/image.jpg") # Perform inference
- >>> single_result = results[0] # Get the first result
- >>> subset_results = results[1:4] # Get a slice of results
- """
- return self._apply("__getitem__", idx)
- def __len__(self):
- """
- Return the number of detections in the Results object.
- Returns:
- (int): The number of detections, determined by the length of the first non-empty attribute
- (boxes, masks, probs, keypoints, or obb).
- Examples:
- >>> results = Results(orig_img, path, names, boxes=torch.rand(5, 4))
- >>> len(results)
- 5
- """
- for k in self._keys:
- v = getattr(self, k)
- if v is not None:
- return len(v)
- def update(self, boxes=None, masks=None, probs=None, obb=None):
- """
- Updates the Results object with new detection data.
- This method allows updating the boxes, masks, probabilities, and oriented bounding boxes (OBB) of the
- Results object. It ensures that boxes are clipped to the original image shape.
- Args:
- boxes (torch.Tensor | None): A tensor of shape (N, 6) containing bounding box coordinates and
- confidence scores. The format is (x1, y1, x2, y2, conf, class).
- masks (torch.Tensor | None): A tensor of shape (N, H, W) containing segmentation masks.
- probs (torch.Tensor | None): A tensor of shape (num_classes,) containing class probabilities.
- obb (torch.Tensor | None): A tensor of shape (N, 5) containing oriented bounding box coordinates.
- Examples:
- >>> results = model("image.jpg")
- >>> new_boxes = torch.tensor([[100, 100, 200, 200, 0.9, 0]])
- >>> results[0].update(boxes=new_boxes)
- """
- if boxes is not None:
- self.boxes = Boxes(ops.clip_boxes(boxes, self.orig_shape), self.orig_shape)
- if masks is not None:
- self.masks = Masks(masks, self.orig_shape)
- if probs is not None:
- self.probs = probs
- if obb is not None:
- self.obb = OBB(obb, self.orig_shape)
- def _apply(self, fn, *args, **kwargs):
- """
- Applies a function to all non-empty attributes and returns a new Results object with modified attributes.
- This method is internally called by methods like .to(), .cuda(), .cpu(), etc.
- Args:
- fn (str): The name of the function to apply.
- *args (Any): Variable length argument list to pass to the function.
- **kwargs (Any): Arbitrary keyword arguments to pass to the function.
- Returns:
- (Results): A new Results object with attributes modified by the applied function.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> for result in results:
- ... result_cuda = result.cuda()
- ... result_cpu = result.cpu()
- """
- r = self.new()
- for k in self._keys:
- v = getattr(self, k)
- if v is not None:
- setattr(r, k, getattr(v, fn)(*args, **kwargs))
- return r
- def cpu(self):
- """
- Returns a copy of the Results object with all its tensors moved to CPU memory.
- This method creates a new Results object with all tensor attributes (boxes, masks, probs, keypoints, obb)
- transferred to CPU memory. It's useful for moving data from GPU to CPU for further processing or saving.
- Returns:
- (Results): A new Results object with all tensor attributes on CPU memory.
- Examples:
- >>> results = model("path/to/image.jpg") # Perform inference
- >>> cpu_result = results[0].cpu() # Move the first result to CPU
- >>> print(cpu_result.boxes.device) # Output: cpu
- """
- return self._apply("cpu")
- def numpy(self):
- """
- Converts all tensors in the Results object to numpy arrays.
- Returns:
- (Results): A new Results object with all tensors converted to numpy arrays.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> numpy_result = results[0].numpy()
- >>> type(numpy_result.boxes.data)
- <class 'numpy.ndarray'>
- Notes:
- This method creates a new Results object, leaving the original unchanged. It's useful for
- interoperability with numpy-based libraries or when CPU-based operations are required.
- """
- return self._apply("numpy")
- def cuda(self):
- """
- Moves all tensors in the Results object to GPU memory.
- Returns:
- (Results): A new Results object with all tensors moved to CUDA device.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> cuda_results = results[0].cuda() # Move first result to GPU
- >>> for result in results:
- ... result_cuda = result.cuda() # Move each result to GPU
- """
- return self._apply("cuda")
- def to(self, *args, **kwargs):
- """
- Moves all tensors in the Results object to the specified device and dtype.
- Args:
- *args (Any): Variable length argument list to be passed to torch.Tensor.to().
- **kwargs (Any): Arbitrary keyword arguments to be passed to torch.Tensor.to().
- Returns:
- (Results): A new Results object with all tensors moved to the specified device and dtype.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> result_cuda = results[0].to("cuda") # Move first result to GPU
- >>> result_cpu = results[0].to("cpu") # Move first result to CPU
- >>> result_half = results[0].to(dtype=torch.float16) # Convert first result to half precision
- """
- return self._apply("to", *args, **kwargs)
- def new(self):
- """
- Creates a new Results object with the same image, path, names, and speed attributes.
- Returns:
- (Results): A new Results object with copied attributes from the original instance.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> new_result = results[0].new()
- """
- return Results(orig_img=self.orig_img, path=self.path, names=self.names, speed=self.speed)
- def plot(
- self,
- conf=True,
- line_width=None,
- font_size=None,
- font="Arial.ttf",
- pil=False,
- img=None,
- im_gpu=None,
- kpt_radius=5,
- kpt_line=True,
- labels=True,
- boxes=True,
- masks=True,
- probs=True,
- show=False,
- save=False,
- filename=None,
- color_mode="class",
- ):
- """
- Plots detection results on an input RGB image.
- Args:
- conf (bool): Whether to plot detection confidence scores.
- line_width (float | None): Line width of bounding boxes. If None, scaled to image size.
- font_size (float | None): Font size for text. If None, scaled to image size.
- font (str): Font to use for text.
- pil (bool): Whether to return the image as a PIL Image.
- img (np.ndarray | None): Image to plot on. If None, uses original image.
- im_gpu (torch.Tensor | None): Normalized image on GPU for faster mask plotting.
- kpt_radius (int): Radius of drawn keypoints.
- kpt_line (bool): Whether to draw lines connecting keypoints.
- labels (bool): Whether to plot labels of bounding boxes.
- boxes (bool): Whether to plot bounding boxes.
- masks (bool): Whether to plot masks.
- probs (bool): Whether to plot classification probabilities.
- show (bool): Whether to display the annotated image.
- save (bool): Whether to save the annotated image.
- filename (str | None): Filename to save image if save is True.
- color_mode (bool): Specify the color mode, e.g., 'instance' or 'class'. Default to 'class'.
- Returns:
- (np.ndarray): Annotated image as a numpy array.
- Examples:
- >>> results = model("image.jpg")
- >>> for result in results:
- ... im = result.plot()
- ... im.show()
- """
- assert color_mode in {"instance", "class"}, f"Expected color_mode='instance' or 'class', not {color_mode}."
- if img is None and isinstance(self.orig_img, torch.Tensor):
- img = (self.orig_img[0].detach().permute(1, 2, 0).contiguous() * 255).to(torch.uint8).cpu().numpy()
- names = self.names
- is_obb = self.obb is not None
- pred_boxes, show_boxes = self.obb if is_obb else self.boxes, boxes
- pred_masks, show_masks = self.masks, masks
- pred_probs, show_probs = self.probs, probs
- annotator = Annotator(
- deepcopy(self.orig_img if img is None else img),
- line_width,
- font_size,
- font,
- pil or (pred_probs is not None and show_probs), # Classify tasks default to pil=True
- example=names,
- )
- # Plot Segment results
- if pred_masks and show_masks:
- if im_gpu is None:
- img = LetterBox(pred_masks.shape[1:])(image=annotator.result())
- im_gpu = (
- torch.as_tensor(img, dtype=torch.float16, device=pred_masks.data.device)
- .permute(2, 0, 1)
- .flip(0)
- .contiguous()
- / 255
- )
- idx = (
- pred_boxes.id
- if pred_boxes.id is not None and color_mode == "instance"
- else pred_boxes.cls
- if pred_boxes and color_mode == "class"
- else reversed(range(len(pred_masks)))
- )
- annotator.masks(pred_masks.data, colors=[colors(x, True) for x in idx], im_gpu=im_gpu)
- # Plot Detect results
- if pred_boxes is not None and show_boxes:
- for i, d in enumerate(reversed(pred_boxes)):
- c, d_conf, id = int(d.cls), float(d.conf) if conf else None, None if d.id is None else int(d.id.item())
- name = ("" if id is None else f"id:{id} ") + names[c]
- label = (f"{name} {d_conf:.2f}" if conf else name) if labels else None
- box = d.xyxyxyxy.reshape(-1, 4, 2).squeeze() if is_obb else d.xyxy.squeeze()
- annotator.box_label(
- box,
- label,
- color=colors(
- c
- if color_mode == "class"
- else id
- if id is not None
- else i
- if color_mode == "instance"
- else None,
- True,
- ),
- rotated=is_obb,
- )
- # Plot Classify results
- if pred_probs is not None and show_probs:
- text = ",\n".join(f"{names[j] if names else j} {pred_probs.data[j]:.2f}" for j in pred_probs.top5)
- x = round(self.orig_shape[0] * 0.03)
- annotator.text([x, x], text, txt_color=(255, 255, 255)) # TODO: allow setting colors
- # Plot Pose results
- if self.keypoints is not None:
- for i, k in enumerate(reversed(self.keypoints.data)):
- annotator.kpts(
- k,
- self.orig_shape,
- radius=kpt_radius,
- kpt_line=kpt_line,
- kpt_color=colors(i, True) if color_mode == "instance" else None,
- )
- # Show results
- if show:
- annotator.show(self.path)
- # Save results
- if save:
- annotator.save(filename)
- return annotator.result()
- def show(self, *args, **kwargs):
- """
- Display the image with annotated inference results.
- This method plots the detection results on the original image and displays it. It's a convenient way to
- visualize the model's predictions directly.
- Args:
- *args (Any): Variable length argument list to be passed to the `plot()` method.
- **kwargs (Any): Arbitrary keyword arguments to be passed to the `plot()` method.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> results[0].show() # Display the first result
- >>> for result in results:
- ... result.show() # Display all results
- """
- self.plot(show=True, *args, **kwargs)
- def save(self, filename=None, *args, **kwargs):
- """
- Saves annotated inference results image to file.
- This method plots the detection results on the original image and saves the annotated image to a file. It
- utilizes the `plot` method to generate the annotated image and then saves it to the specified filename.
- Args:
- filename (str | Path | None): The filename to save the annotated image. If None, a default filename
- is generated based on the original image path.
- *args (Any): Variable length argument list to be passed to the `plot` method.
- **kwargs (Any): Arbitrary keyword arguments to be passed to the `plot` method.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> for result in results:
- ... result.save("annotated_image.jpg")
- >>> # Or with custom plot arguments
- >>> for result in results:
- ... result.save("annotated_image.jpg", conf=False, line_width=2)
- """
- if not filename:
- filename = f"results_{Path(self.path).name}"
- self.plot(save=True, filename=filename, *args, **kwargs)
- return filename
- def verbose(self):
- """
- Returns a log string for each task in the results, detailing detection and classification outcomes.
- This method generates a human-readable string summarizing the detection and classification results. It includes
- the number of detections for each class and the top probabilities for classification tasks.
- Returns:
- (str): A formatted string containing a summary of the results. For detection tasks, it includes the
- number of detections per class. For classification tasks, it includes the top 5 class probabilities.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> for result in results:
- ... print(result.verbose())
- 2 persons, 1 car, 3 traffic lights,
- dog 0.92, cat 0.78, horse 0.64,
- Notes:
- - If there are no detections, the method returns "(no detections), " for detection tasks.
- - For classification tasks, it returns the top 5 class probabilities and their corresponding class names.
- - The returned string is comma-separated and ends with a comma and a space.
- """
- log_string = ""
- probs = self.probs
- if len(self) == 0:
- return log_string if probs is not None else f"{log_string}(no detections), "
- if probs is not None:
- log_string += f"{', '.join(f'{self.names[j]} {probs.data[j]:.2f}' for j in probs.top5)}, "
- if boxes := self.boxes:
- for c in boxes.cls.unique():
- n = (boxes.cls == c).sum() # detections per class
- log_string += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "
- return log_string
- def save_txt(self, txt_file, save_conf=False):
- """
- Save detection results to a text file.
- Args:
- txt_file (str | Path): Path to the output text file.
- save_conf (bool): Whether to include confidence scores in the output.
- Returns:
- (str): Path to the saved text file.
- Examples:
- >>> from ultralytics import YOLO
- >>> model = YOLO("yolo11n.pt")
- >>> results = model("path/to/image.jpg")
- >>> for result in results:
- ... result.save_txt("output.txt")
- Notes:
- - The file will contain one line per detection or classification with the following structure:
- - For detections: `class confidence x_center y_center width height`
- - For classifications: `confidence class_name`
- - For masks and keypoints, the specific formats will vary accordingly.
- - The function will create the output directory if it does not exist.
- - If save_conf is False, the confidence scores will be excluded from the output.
- - Existing contents of the file will not be overwritten; new results will be appended.
- """
- is_obb = self.obb is not None
- boxes = self.obb if is_obb else self.boxes
- masks = self.masks
- probs = self.probs
- kpts = self.keypoints
- texts = []
- if probs is not None:
- # Classify
- [texts.append(f"{probs.data[j]:.2f} {self.names[j]}") for j in probs.top5]
- elif boxes:
- # Detect/segment/pose
- for j, d in enumerate(boxes):
- c, conf, id = int(d.cls), float(d.conf), None if d.id is None else int(d.id.item())
- line = (c, *(d.xyxyxyxyn.view(-1) if is_obb else d.xywhn.view(-1)))
- if masks:
- seg = masks[j].xyn[0].copy().reshape(-1) # reversed mask.xyn, (n,2) to (n*2)
- line = (c, *seg)
- if kpts is not None:
- kpt = torch.cat((kpts[j].xyn, kpts[j].conf[..., None]), 2) if kpts[j].has_visible else kpts[j].xyn
- line += (*kpt.reshape(-1).tolist(),)
- line += (conf,) * save_conf + (() if id is None else (id,))
- texts.append(("%g " * len(line)).rstrip() % line)
- if texts:
- Path(txt_file).parent.mkdir(parents=True, exist_ok=True) # make directory
- with open(txt_file, "a") as f:
- f.writelines(text + "\n" for text in texts)
- def save_crop(self, save_dir, file_name=Path("im.jpg")):
- """
- Saves cropped detection images to specified directory.
- This method saves cropped images of detected objects to a specified directory. Each crop is saved in a
- subdirectory named after the object's class, with the filename based on the input file_name.
- Args:
- save_dir (str | Path): Directory path where cropped images will be saved.
- file_name (str | Path): Base filename for the saved cropped images. Default is Path("im.jpg").
- Notes:
- - This method does not support Classify or Oriented Bounding Box (OBB) tasks.
- - Crops are saved as 'save_dir/class_name/file_name.jpg'.
- - The method will create necessary subdirectories if they don't exist.
- - Original image is copied before cropping to avoid modifying the original.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> for result in results:
- ... result.save_crop(save_dir="path/to/crops", file_name="detection")
- """
- if self.probs is not None:
- LOGGER.warning("WARNING ⚠️ Classify task do not support `save_crop`.")
- return
- if self.obb is not None:
- LOGGER.warning("WARNING ⚠️ OBB task do not support `save_crop`.")
- return
- for d in self.boxes:
- save_one_box(
- d.xyxy,
- self.orig_img.copy(),
- file=Path(save_dir) / self.names[int(d.cls)] / Path(file_name).with_suffix(".jpg"),
- BGR=True,
- )
- def summary(self, normalize=False, decimals=5):
- """
- Converts inference results to a summarized dictionary with optional normalization for box coordinates.
- This method creates a list of detection dictionaries, each containing information about a single
- detection or classification result. For classification tasks, it returns the top class and its
- confidence. For detection tasks, it includes class information, bounding box coordinates, and
- optionally mask segments and keypoints.
- Args:
- normalize (bool): Whether to normalize bounding box coordinates by image dimensions. Defaults to False.
- decimals (int): Number of decimal places to round the output values to. Defaults to 5.
- Returns:
- (List[Dict]): A list of dictionaries, each containing summarized information for a single
- detection or classification result. The structure of each dictionary varies based on the
- task type (classification or detection) and available information (boxes, masks, keypoints).
- Examples:
- >>> results = model("image.jpg")
- >>> summary = results[0].summary()
- >>> print(summary)
- """
- # Create list of detection dictionaries
- results = []
- if self.probs is not None:
- class_id = self.probs.top1
- results.append(
- {
- "name": self.names[class_id],
- "class": class_id,
- "confidence": round(self.probs.top1conf.item(), decimals),
- }
- )
- return results
- is_obb = self.obb is not None
- data = self.obb if is_obb else self.boxes
- h, w = self.orig_shape if normalize else (1, 1)
- for i, row in enumerate(data): # xyxy, track_id if tracking, conf, class_id
- class_id, conf = int(row.cls), round(row.conf.item(), decimals)
- box = (row.xyxyxyxy if is_obb else row.xyxy).squeeze().reshape(-1, 2).tolist()
- xy = {}
- for j, b in enumerate(box):
- xy[f"x{j + 1}"] = round(b[0] / w, decimals)
- xy[f"y{j + 1}"] = round(b[1] / h, decimals)
- result = {"name": self.names[class_id], "class": class_id, "confidence": conf, "box": xy}
- if data.is_track:
- result["track_id"] = int(row.id.item()) # track ID
- if self.masks:
- result["segments"] = {
- "x": (self.masks.xy[i][:, 0] / w).round(decimals).tolist(),
- "y": (self.masks.xy[i][:, 1] / h).round(decimals).tolist(),
- }
- if self.keypoints is not None:
- x, y, visible = self.keypoints[i].data[0].cpu().unbind(dim=1) # torch Tensor
- result["keypoints"] = {
- "x": (x / w).numpy().round(decimals).tolist(), # decimals named argument required
- "y": (y / h).numpy().round(decimals).tolist(),
- "visible": visible.numpy().round(decimals).tolist(),
- }
- results.append(result)
- return results
- def to_df(self, normalize=False, decimals=5):
- """
- Converts detection results to a Pandas Dataframe.
- This method converts the detection results into Pandas Dataframe format. It includes information
- about detected objects such as bounding boxes, class names, confidence scores, and optionally
- segmentation masks and keypoints.
- Args:
- normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
- If True, coordinates will be returned as float values between 0 and 1. Defaults to False.
- decimals (int): Number of decimal places to round the output values to. Defaults to 5.
- Returns:
- (DataFrame): A Pandas Dataframe containing all the information in results in an organized way.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> df_result = results[0].to_df()
- >>> print(df_result)
- """
- import pandas as pd # scope for faster 'import ultralytics'
- return pd.DataFrame(self.summary(normalize=normalize, decimals=decimals))
- def to_csv(self, normalize=False, decimals=5, *args, **kwargs):
- """
- Converts detection results to a CSV format.
- This method serializes the detection results into a CSV format. It includes information
- about detected objects such as bounding boxes, class names, confidence scores, and optionally
- segmentation masks and keypoints.
- Args:
- normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
- If True, coordinates will be returned as float values between 0 and 1. Defaults to False.
- decimals (int): Number of decimal places to round the output values to. Defaults to 5.
- *args (Any): Variable length argument list to be passed to pandas.DataFrame.to_csv().
- **kwargs (Any): Arbitrary keyword arguments to be passed to pandas.DataFrame.to_csv().
- Returns:
- (str): CSV containing all the information in results in an organized way.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> csv_result = results[0].to_csv()
- >>> print(csv_result)
- """
- return self.to_df(normalize=normalize, decimals=decimals).to_csv(*args, **kwargs)
- def to_xml(self, normalize=False, decimals=5, *args, **kwargs):
- """
- Converts detection results to XML format.
- This method serializes the detection results into an XML format. It includes information
- about detected objects such as bounding boxes, class names, confidence scores, and optionally
- segmentation masks and keypoints.
- Args:
- normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
- If True, coordinates will be returned as float values between 0 and 1. Defaults to False.
- decimals (int): Number of decimal places to round the output values to. Defaults to 5.
- *args (Any): Variable length argument list to be passed to pandas.DataFrame.to_xml().
- **kwargs (Any): Arbitrary keyword arguments to be passed to pandas.DataFrame.to_xml().
- Returns:
- (str): An XML string containing all the information in results in an organized way.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> xml_result = results[0].to_xml()
- >>> print(xml_result)
- """
- check_requirements("lxml")
- df = self.to_df(normalize=normalize, decimals=decimals)
- return '<?xml version="1.0" encoding="utf-8"?>\n<root></root>' if df.empty else df.to_xml(*args, **kwargs)
- def tojson(self, normalize=False, decimals=5):
- """Deprecated version of to_json()."""
- LOGGER.warning("WARNING ⚠️ 'result.tojson()' is deprecated, replace with 'result.to_json()'.")
- return self.to_json(normalize, decimals)
- def to_json(self, normalize=False, decimals=5):
- """
- Converts detection results to JSON format.
- This method serializes the detection results into a JSON-compatible format. It includes information
- about detected objects such as bounding boxes, class names, confidence scores, and optionally
- segmentation masks and keypoints.
- Args:
- normalize (bool): Whether to normalize the bounding box coordinates by the image dimensions.
- If True, coordinates will be returned as float values between 0 and 1. Defaults to False.
- decimals (int): Number of decimal places to round the output values to. Defaults to 5.
- Returns:
- (str): A JSON string containing the serialized detection results.
- Examples:
- >>> results = model("path/to/image.jpg")
- >>> json_result = results[0].to_json()
- >>> print(json_result)
- Notes:
- - For classification tasks, the JSON will contain class probabilities instead of bounding boxes.
- - For object detection tasks, the JSON will include bounding box coordinates, class names, and
- confidence scores.
- - If available, segmentation masks and keypoints will also be included in the JSON output.
- - The method uses the `summary` method internally to generate the data structure before
- converting it to JSON.
- """
- import json
- return json.dumps(self.summary(normalize=normalize, decimals=decimals), indent=2)
- class Boxes(BaseTensor):
- """
- A class for managing and manipulating detection boxes.
- This class provides functionality for handling detection boxes, including their coordinates, confidence scores,
- class labels, and optional tracking IDs. It supports various box formats and offers methods for easy manipulation
- and conversion between different coordinate systems.
- Attributes:
- data (torch.Tensor | numpy.ndarray): The raw tensor containing detection boxes and associated data.
- orig_shape (Tuple[int, int]): The original image dimensions (height, width).
- is_track (bool): Indicates whether tracking IDs are included in the box data.
- xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format.
- conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
- cls (torch.Tensor | numpy.ndarray): Class labels for each box.
- id (torch.Tensor | numpy.ndarray): Tracking IDs for each box (if available).
- xywh (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format.
- xyxyn (torch.Tensor | numpy.ndarray): Normalized [x1, y1, x2, y2] boxes relative to orig_shape.
- xywhn (torch.Tensor | numpy.ndarray): Normalized [x, y, width, height] boxes relative to orig_shape.
- Methods:
- cpu(): Returns a copy of the object with all tensors on CPU memory.
- numpy(): Returns a copy of the object with all tensors as numpy arrays.
- cuda(): Returns a copy of the object with all tensors on GPU memory.
- to(*args, **kwargs): Returns a copy of the object with tensors on specified device and dtype.
- Examples:
- >>> import torch
- >>> boxes_data = torch.tensor([[100, 50, 150, 100, 0.9, 0], [200, 150, 300, 250, 0.8, 1]])
- >>> orig_shape = (480, 640) # height, width
- >>> boxes = Boxes(boxes_data, orig_shape)
- >>> print(boxes.xyxy)
- >>> print(boxes.conf)
- >>> print(boxes.cls)
- >>> print(boxes.xywhn)
- """
- def __init__(self, boxes, orig_shape) -> None:
- """
- Initialize the Boxes class with detection box data and the original image shape.
- This class manages detection boxes, providing easy access and manipulation of box coordinates,
- confidence scores, class identifiers, and optional tracking IDs. It supports multiple formats
- for box coordinates, including both absolute and normalized forms.
- Args:
- boxes (torch.Tensor | np.ndarray): A tensor or numpy array with detection boxes of shape
- (num_boxes, 6) or (num_boxes, 7). Columns should contain
- [x1, y1, x2, y2, confidence, class, (optional) track_id].
- orig_shape (Tuple[int, int]): The original image shape as (height, width). Used for normalization.
- Attributes:
- data (torch.Tensor): The raw tensor containing detection boxes and their associated data.
- orig_shape (Tuple[int, int]): The original image size, used for normalization.
- is_track (bool): Indicates whether tracking IDs are included in the box data.
- Examples:
- >>> import torch
- >>> boxes = torch.tensor([[100, 50, 150, 100, 0.9, 0]])
- >>> orig_shape = (480, 640)
- >>> detection_boxes = Boxes(boxes, orig_shape)
- >>> print(detection_boxes.xyxy)
- tensor([[100., 50., 150., 100.]])
- """
- if boxes.ndim == 1:
- boxes = boxes[None, :]
- n = boxes.shape[-1]
- assert n in {6, 7}, f"expected 6 or 7 values but got {n}" # xyxy, track_id, conf, cls
- super().__init__(boxes, orig_shape)
- self.is_track = n == 7
- self.orig_shape = orig_shape
- @property
- def xyxy(self):
- """
- Returns bounding boxes in [x1, y1, x2, y2] format.
- Returns:
- (torch.Tensor | numpy.ndarray): A tensor or numpy array of shape (n, 4) containing bounding box
- coordinates in [x1, y1, x2, y2] format, where n is the number of boxes.
- Examples:
- >>> results = model("image.jpg")
- >>> boxes = results[0].boxes
- >>> xyxy = boxes.xyxy
- >>> print(xyxy)
- """
- return self.data[:, :4]
- @property
- def conf(self):
- """
- Returns the confidence scores for each detection box.
- Returns:
- (torch.Tensor | numpy.ndarray): A 1D tensor or array containing confidence scores for each detection,
- with shape (N,) where N is the number of detections.
- Examples:
- >>> boxes = Boxes(torch.tensor([[10, 20, 30, 40, 0.9, 0]]), orig_shape=(100, 100))
- >>> conf_scores = boxes.conf
- >>> print(conf_scores)
- tensor([0.9000])
- """
- return self.data[:, -2]
- @property
- def cls(self):
- """
- Returns the class ID tensor representing category predictions for each bounding box.
- Returns:
- (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the class IDs for each detection box.
- The shape is (N,), where N is the number of boxes.
- Examples:
- >>> results = model("image.jpg")
- >>> boxes = results[0].boxes
- >>> class_ids = boxes.cls
- >>> print(class_ids) # tensor([0., 2., 1.])
- """
- return self.data[:, -1]
- @property
- def id(self):
- """
- Returns the tracking IDs for each detection box if available.
- Returns:
- (torch.Tensor | None): A tensor containing tracking IDs for each box if tracking is enabled,
- otherwise None. Shape is (N,) where N is the number of boxes.
- Examples:
- >>> results = model.track("path/to/video.mp4")
- >>> for result in results:
- ... boxes = result.boxes
- ... if boxes.is_track:
- ... track_ids = boxes.id
- ... print(f"Tracking IDs: {track_ids}")
- ... else:
- ... print("Tracking is not enabled for these boxes.")
- Notes:
- - This property is only available when tracking is enabled (i.e., when `is_track` is True).
- - The tracking IDs are typically used to associate detections across multiple frames in video analysis.
- """
- return self.data[:, -3] if self.is_track else None
- @property
- @lru_cache(maxsize=2) # maxsize 1 should suffice
- def xywh(self):
- """
- Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format.
- Returns:
- (torch.Tensor | numpy.ndarray): Boxes in [x_center, y_center, width, height] format, where x_center, y_center are the coordinates of
- the center point of the bounding box, width, height are the dimensions of the bounding box and the
- shape of the returned tensor is (N, 4), where N is the number of boxes.
- Examples:
- >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100], [200, 150, 300, 250]]), orig_shape=(480, 640))
- >>> xywh = boxes.xywh
- >>> print(xywh)
- tensor([[100.0000, 50.0000, 50.0000, 50.0000],
- [200.0000, 150.0000, 100.0000, 100.0000]])
- """
- return ops.xyxy2xywh(self.xyxy)
- @property
- @lru_cache(maxsize=2)
- def xyxyn(self):
- """
- Returns normalized bounding box coordinates relative to the original image size.
- This property calculates and returns the bounding box coordinates in [x1, y1, x2, y2] format,
- normalized to the range [0, 1] based on the original image dimensions.
- Returns:
- (torch.Tensor | numpy.ndarray): Normalized bounding box coordinates with shape (N, 4), where N is
- the number of boxes. Each row contains [x1, y1, x2, y2] values normalized to [0, 1].
- Examples:
- >>> boxes = Boxes(torch.tensor([[100, 50, 300, 400, 0.9, 0]]), orig_shape=(480, 640))
- >>> normalized = boxes.xyxyn
- >>> print(normalized)
- tensor([[0.1562, 0.1042, 0.4688, 0.8333]])
- """
- xyxy = self.xyxy.clone() if isinstance(self.xyxy, torch.Tensor) else np.copy(self.xyxy)
- xyxy[..., [0, 2]] /= self.orig_shape[1]
- xyxy[..., [1, 3]] /= self.orig_shape[0]
- return xyxy
- @property
- @lru_cache(maxsize=2)
- def xywhn(self):
- """
- Returns normalized bounding boxes in [x, y, width, height] format.
- This property calculates and returns the normalized bounding box coordinates in the format
- [x_center, y_center, width, height], where all values are relative to the original image dimensions.
- Returns:
- (torch.Tensor | numpy.ndarray): Normalized bounding boxes with shape (N, 4), where N is the
- number of boxes. Each row contains [x_center, y_center, width, height] values normalized
- to [0, 1] based on the original image dimensions.
- Examples:
- >>> boxes = Boxes(torch.tensor([[100, 50, 150, 100, 0.9, 0]]), orig_shape=(480, 640))
- >>> normalized = boxes.xywhn
- >>> print(normalized)
- tensor([[0.1953, 0.1562, 0.0781, 0.1042]])
- """
- xywh = ops.xyxy2xywh(self.xyxy)
- xywh[..., [0, 2]] /= self.orig_shape[1]
- xywh[..., [1, 3]] /= self.orig_shape[0]
- return xywh
- class Masks(BaseTensor):
- """
- A class for storing and manipulating detection masks.
- This class extends BaseTensor and provides functionality for handling segmentation masks,
- including methods for converting between pixel and normalized coordinates.
- Attributes:
- data (torch.Tensor | numpy.ndarray): The raw tensor or array containing mask data.
- orig_shape (tuple): Original image shape in (height, width) format.
- xy (List[numpy.ndarray]): A list of segments in pixel coordinates.
- xyn (List[numpy.ndarray]): A list of normalized segments.
- Methods:
- cpu(): Returns a copy of the Masks object with the mask tensor on CPU memory.
- numpy(): Returns a copy of the Masks object with the mask tensor as a numpy array.
- cuda(): Returns a copy of the Masks object with the mask tensor on GPU memory.
- to(*args, **kwargs): Returns a copy of the Masks object with the mask tensor on specified device and dtype.
- Examples:
- >>> masks_data = torch.rand(1, 160, 160)
- >>> orig_shape = (720, 1280)
- >>> masks = Masks(masks_data, orig_shape)
- >>> pixel_coords = masks.xy
- >>> normalized_coords = masks.xyn
- """
- def __init__(self, masks, orig_shape) -> None:
- """
- Initialize the Masks class with detection mask data and the original image shape.
- Args:
- masks (torch.Tensor | np.ndarray): Detection masks with shape (num_masks, height, width).
- orig_shape (tuple): The original image shape as (height, width). Used for normalization.
- Examples:
- >>> import torch
- >>> from ultralytics.engine.results import Masks
- >>> masks = torch.rand(10, 160, 160) # 10 masks of 160x160 resolution
- >>> orig_shape = (720, 1280) # Original image shape
- >>> mask_obj = Masks(masks, orig_shape)
- """
- if masks.ndim == 2:
- masks = masks[None, :]
- super().__init__(masks, orig_shape)
- @property
- @lru_cache(maxsize=1)
- def xyn(self):
- """
- Returns normalized xy-coordinates of the segmentation masks.
- This property calculates and caches the normalized xy-coordinates of the segmentation masks. The coordinates
- are normalized relative to the original image shape.
- Returns:
- (List[numpy.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates
- of a single segmentation mask. Each array has shape (N, 2), where N is the number of points in the
- mask contour.
- Examples:
- >>> results = model("image.jpg")
- >>> masks = results[0].masks
- >>> normalized_coords = masks.xyn
- >>> print(normalized_coords[0]) # Normalized coordinates of the first mask
- """
- return [
- ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=True)
- for x in ops.masks2segments(self.data)
- ]
- @property
- @lru_cache(maxsize=1)
- def xy(self):
- """
- Returns the [x, y] pixel coordinates for each segment in the mask tensor.
- This property calculates and returns a list of pixel coordinates for each segmentation mask in the
- Masks object. The coordinates are scaled to match the original image dimensions.
- Returns:
- (List[numpy.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel
- coordinates for a single segmentation mask. Each array has shape (N, 2), where N is the
- number of points in the segment.
- Examples:
- >>> results = model("image.jpg")
- >>> masks = results[0].masks
- >>> xy_coords = masks.xy
- >>> print(len(xy_coords)) # Number of masks
- >>> print(xy_coords[0].shape) # Shape of first mask's coordinates
- """
- return [
- ops.scale_coords(self.data.shape[1:], x, self.orig_shape, normalize=False)
- for x in ops.masks2segments(self.data)
- ]
- class Keypoints(BaseTensor):
- """
- A class for storing and manipulating detection keypoints.
- This class encapsulates functionality for handling keypoint data, including coordinate manipulation,
- normalization, and confidence values.
- Attributes:
- data (torch.Tensor): The raw tensor containing keypoint data.
- orig_shape (Tuple[int, int]): The original image dimensions (height, width).
- has_visible (bool): Indicates whether visibility information is available for keypoints.
- xy (torch.Tensor): Keypoint coordinates in [x, y] format.
- xyn (torch.Tensor): Normalized keypoint coordinates in [x, y] format, relative to orig_shape.
- conf (torch.Tensor): Confidence values for each keypoint, if available.
- Methods:
- cpu(): Returns a copy of the keypoints tensor on CPU memory.
- numpy(): Returns a copy of the keypoints tensor as a numpy array.
- cuda(): Returns a copy of the keypoints tensor on GPU memory.
- to(*args, **kwargs): Returns a copy of the keypoints tensor with specified device and dtype.
- Examples:
- >>> import torch
- >>> from ultralytics.engine.results import Keypoints
- >>> keypoints_data = torch.rand(1, 17, 3) # 1 detection, 17 keypoints, (x, y, conf)
- >>> orig_shape = (480, 640) # Original image shape (height, width)
- >>> keypoints = Keypoints(keypoints_data, orig_shape)
- >>> print(keypoints.xy.shape) # Access xy coordinates
- >>> print(keypoints.conf) # Access confidence values
- >>> keypoints_cpu = keypoints.cpu() # Move keypoints to CPU
- """
- @smart_inference_mode() # avoid keypoints < conf in-place error
- def __init__(self, keypoints, orig_shape) -> None:
- """
- Initializes the Keypoints object with detection keypoints and original image dimensions.
- This method processes the input keypoints tensor, handling both 2D and 3D formats. For 3D tensors
- (x, y, confidence), it masks out low-confidence keypoints by setting their coordinates to zero.
- Args:
- keypoints (torch.Tensor): A tensor containing keypoint data. Shape can be either:
- - (num_objects, num_keypoints, 2) for x, y coordinates only
- - (num_objects, num_keypoints, 3) for x, y coordinates and confidence scores
- orig_shape (Tuple[int, int]): The original image dimensions (height, width).
- Examples:
- >>> kpts = torch.rand(1, 17, 3) # 1 object, 17 keypoints (COCO format), x,y,conf
- >>> orig_shape = (720, 1280) # Original image height, width
- >>> keypoints = Keypoints(kpts, orig_shape)
- """
- if keypoints.ndim == 2:
- keypoints = keypoints[None, :]
- if keypoints.shape[2] == 3: # x, y, conf
- mask = keypoints[..., 2] < 0.5 # points with conf < 0.5 (not visible)
- keypoints[..., :2][mask] = 0
- super().__init__(keypoints, orig_shape)
- self.has_visible = self.data.shape[-1] == 3
- @property
- @lru_cache(maxsize=1)
- def xy(self):
- """
- Returns x, y coordinates of keypoints.
- Returns:
- (torch.Tensor): A tensor containing the x, y coordinates of keypoints with shape (N, K, 2), where N is
- the number of detections and K is the number of keypoints per detection.
- Examples:
- >>> results = model("image.jpg")
- >>> keypoints = results[0].keypoints
- >>> xy = keypoints.xy
- >>> print(xy.shape) # (N, K, 2)
- >>> print(xy[0]) # x, y coordinates of keypoints for first detection
- Notes:
- - The returned coordinates are in pixel units relative to the original image dimensions.
- - If keypoints were initialized with confidence values, only keypoints with confidence >= 0.5 are returned.
- - This property uses LRU caching to improve performance on repeated access.
- """
- return self.data[..., :2]
- @property
- @lru_cache(maxsize=1)
- def xyn(self):
- """
- Returns normalized coordinates (x, y) of keypoints relative to the original image size.
- Returns:
- (torch.Tensor | numpy.ndarray): A tensor or array of shape (N, K, 2) containing normalized keypoint
- coordinates, where N is the number of instances, K is the number of keypoints, and the last
- dimension contains [x, y] values in the range [0, 1].
- Examples:
- >>> keypoints = Keypoints(torch.rand(1, 17, 2), orig_shape=(480, 640))
- >>> normalized_kpts = keypoints.xyn
- >>> print(normalized_kpts.shape)
- torch.Size([1, 17, 2])
- """
- xy = self.xy.clone() if isinstance(self.xy, torch.Tensor) else np.copy(self.xy)
- xy[..., 0] /= self.orig_shape[1]
- xy[..., 1] /= self.orig_shape[0]
- return xy
- @property
- @lru_cache(maxsize=1)
- def conf(self):
- """
- Returns confidence values for each keypoint.
- Returns:
- (torch.Tensor | None): A tensor containing confidence scores for each keypoint if available,
- otherwise None. Shape is (num_detections, num_keypoints) for batched data or (num_keypoints,)
- for single detection.
- Examples:
- >>> keypoints = Keypoints(torch.rand(1, 17, 3), orig_shape=(640, 640)) # 1 detection, 17 keypoints
- >>> conf = keypoints.conf
- >>> print(conf.shape) # torch.Size([1, 17])
- """
- return self.data[..., 2] if self.has_visible else None
- class Probs(BaseTensor):
- """
- A class for storing and manipulating classification probabilities.
- This class extends BaseTensor and provides methods for accessing and manipulating
- classification probabilities, including top-1 and top-5 predictions.
- Attributes:
- data (torch.Tensor | numpy.ndarray): The raw tensor or array containing classification probabilities.
- orig_shape (tuple | None): The original image shape as (height, width). Not used in this class.
- top1 (int): Index of the class with the highest probability.
- top5 (List[int]): Indices of the top 5 classes by probability.
- top1conf (torch.Tensor | numpy.ndarray): Confidence score of the top 1 class.
- top5conf (torch.Tensor | numpy.ndarray): Confidence scores of the top 5 classes.
- Methods:
- cpu(): Returns a copy of the probabilities tensor on CPU memory.
- numpy(): Returns a copy of the probabilities tensor as a numpy array.
- cuda(): Returns a copy of the probabilities tensor on GPU memory.
- to(*args, **kwargs): Returns a copy of the probabilities tensor with specified device and dtype.
- Examples:
- >>> probs = torch.tensor([0.1, 0.3, 0.6])
- >>> p = Probs(probs)
- >>> print(p.top1)
- 2
- >>> print(p.top5)
- [2, 1, 0]
- >>> print(p.top1conf)
- tensor(0.6000)
- >>> print(p.top5conf)
- tensor([0.6000, 0.3000, 0.1000])
- """
- def __init__(self, probs, orig_shape=None) -> None:
- """
- Initialize the Probs class with classification probabilities.
- This class stores and manages classification probabilities, providing easy access to top predictions and their
- confidences.
- Args:
- probs (torch.Tensor | np.ndarray): A 1D tensor or array of classification probabilities.
- orig_shape (tuple | None): The original image shape as (height, width). Not used in this class but kept for
- consistency with other result classes.
- Attributes:
- data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
- top1 (int): Index of the top 1 class.
- top5 (List[int]): Indices of the top 5 classes.
- top1conf (torch.Tensor | np.ndarray): Confidence of the top 1 class.
- top5conf (torch.Tensor | np.ndarray): Confidences of the top 5 classes.
- Examples:
- >>> import torch
- >>> probs = torch.tensor([0.1, 0.3, 0.2, 0.4])
- >>> p = Probs(probs)
- >>> print(p.top1)
- 3
- >>> print(p.top1conf)
- tensor(0.4000)
- >>> print(p.top5)
- [3, 1, 2, 0]
- """
- super().__init__(probs, orig_shape)
- @property
- @lru_cache(maxsize=1)
- def top1(self):
- """
- Returns the index of the class with the highest probability.
- Returns:
- (int): Index of the class with the highest probability.
- Examples:
- >>> probs = Probs(torch.tensor([0.1, 0.3, 0.6]))
- >>> probs.top1
- 2
- """
- return int(self.data.argmax())
- @property
- @lru_cache(maxsize=1)
- def top5(self):
- """
- Returns the indices of the top 5 class probabilities.
- Returns:
- (List[int]): A list containing the indices of the top 5 class probabilities, sorted in descending order.
- Examples:
- >>> probs = Probs(torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]))
- >>> print(probs.top5)
- [4, 3, 2, 1, 0]
- """
- return (-self.data).argsort(0)[:5].tolist() # this way works with both torch and numpy.
- @property
- @lru_cache(maxsize=1)
- def top1conf(self):
- """
- Returns the confidence score of the highest probability class.
- This property retrieves the confidence score (probability) of the class with the highest predicted probability
- from the classification results.
- Returns:
- (torch.Tensor | numpy.ndarray): A tensor containing the confidence score of the top 1 class.
- Examples:
- >>> results = model("image.jpg") # classify an image
- >>> probs = results[0].probs # get classification probabilities
- >>> top1_confidence = probs.top1conf # get confidence of top 1 class
- >>> print(f"Top 1 class confidence: {top1_confidence.item():.4f}")
- """
- return self.data[self.top1]
- @property
- @lru_cache(maxsize=1)
- def top5conf(self):
- """
- Returns confidence scores for the top 5 classification predictions.
- This property retrieves the confidence scores corresponding to the top 5 class probabilities
- predicted by the model. It provides a quick way to access the most likely class predictions
- along with their associated confidence levels.
- Returns:
- (torch.Tensor | numpy.ndarray): A tensor or array containing the confidence scores for the
- top 5 predicted classes, sorted in descending order of probability.
- Examples:
- >>> results = model("image.jpg")
- >>> probs = results[0].probs
- >>> top5_conf = probs.top5conf
- >>> print(top5_conf) # Prints confidence scores for top 5 classes
- """
- return self.data[self.top5]
- class OBB(BaseTensor):
- """
- A class for storing and manipulating Oriented Bounding Boxes (OBB).
- This class provides functionality to handle oriented bounding boxes, including conversion between
- different formats, normalization, and access to various properties of the boxes.
- Attributes:
- data (torch.Tensor): The raw OBB tensor containing box coordinates and associated data.
- orig_shape (tuple): Original image size as (height, width).
- is_track (bool): Indicates whether tracking IDs are included in the box data.
- xywhr (torch.Tensor | numpy.ndarray): Boxes in [x_center, y_center, width, height, rotation] format.
- conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
- cls (torch.Tensor | numpy.ndarray): Class labels for each box.
- id (torch.Tensor | numpy.ndarray): Tracking IDs for each box, if available.
- xyxyxyxy (torch.Tensor | numpy.ndarray): Boxes in 8-point [x1, y1, x2, y2, x3, y3, x4, y4] format.
- xyxyxyxyn (torch.Tensor | numpy.ndarray): Normalized 8-point coordinates relative to orig_shape.
- xyxy (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in [x1, y1, x2, y2] format.
- Methods:
- cpu(): Returns a copy of the OBB object with all tensors on CPU memory.
- numpy(): Returns a copy of the OBB object with all tensors as numpy arrays.
- cuda(): Returns a copy of the OBB object with all tensors on GPU memory.
- to(*args, **kwargs): Returns a copy of the OBB object with tensors on specified device and dtype.
- Examples:
- >>> boxes = torch.tensor([[100, 50, 150, 100, 30, 0.9, 0]]) # xywhr, conf, cls
- >>> obb = OBB(boxes, orig_shape=(480, 640))
- >>> print(obb.xyxyxyxy)
- >>> print(obb.conf)
- >>> print(obb.cls)
- """
- def __init__(self, boxes, orig_shape) -> None:
- """
- Initialize an OBB (Oriented Bounding Box) instance with oriented bounding box data and original image shape.
- This class stores and manipulates Oriented Bounding Boxes (OBB) for object detection tasks. It provides
- various properties and methods to access and transform the OBB data.
- Args:
- boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes,
- with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values.
- If present, the third last column contains track IDs, and the fifth column contains rotation.
- orig_shape (Tuple[int, int]): Original image size, in the format (height, width).
- Attributes:
- data (torch.Tensor | numpy.ndarray): The raw OBB tensor.
- orig_shape (Tuple[int, int]): The original image shape.
- is_track (bool): Whether the boxes include tracking IDs.
- Raises:
- AssertionError: If the number of values per box is not 7 or 8.
- Examples:
- >>> import torch
- >>> boxes = torch.rand(3, 7) # 3 boxes with 7 values each
- >>> orig_shape = (640, 480)
- >>> obb = OBB(boxes, orig_shape)
- >>> print(obb.xywhr) # Access the boxes in xywhr format
- """
- if boxes.ndim == 1:
- boxes = boxes[None, :]
- n = boxes.shape[-1]
- assert n in {7, 8}, f"expected 7 or 8 values but got {n}" # xywh, rotation, track_id, conf, cls
- super().__init__(boxes, orig_shape)
- self.is_track = n == 8
- self.orig_shape = orig_shape
- @property
- def xywhr(self):
- """
- Returns boxes in [x_center, y_center, width, height, rotation] format.
- Returns:
- (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the oriented bounding boxes with format
- [x_center, y_center, width, height, rotation]. The shape is (N, 5) where N is the number of boxes.
- Examples:
- >>> results = model("image.jpg")
- >>> obb = results[0].obb
- >>> xywhr = obb.xywhr
- >>> print(xywhr.shape)
- torch.Size([3, 5])
- """
- return self.data[:, :5]
- @property
- def conf(self):
- """
- Returns the confidence scores for Oriented Bounding Boxes (OBBs).
- This property retrieves the confidence values associated with each OBB detection. The confidence score
- represents the model's certainty in the detection.
- Returns:
- (torch.Tensor | numpy.ndarray): A tensor or numpy array of shape (N,) containing confidence scores
- for N detections, where each score is in the range [0, 1].
- Examples:
- >>> results = model("image.jpg")
- >>> obb_result = results[0].obb
- >>> confidence_scores = obb_result.conf
- >>> print(confidence_scores)
- """
- return self.data[:, -2]
- @property
- def cls(self):
- """
- Returns the class values of the oriented bounding boxes.
- Returns:
- (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the class values for each oriented
- bounding box. The shape is (N,), where N is the number of boxes.
- Examples:
- >>> results = model("image.jpg")
- >>> result = results[0]
- >>> obb = result.obb
- >>> class_values = obb.cls
- >>> print(class_values)
- """
- return self.data[:, -1]
- @property
- def id(self):
- """
- Returns the tracking IDs of the oriented bounding boxes (if available).
- Returns:
- (torch.Tensor | numpy.ndarray | None): A tensor or numpy array containing the tracking IDs for each
- oriented bounding box. Returns None if tracking IDs are not available.
- Examples:
- >>> results = model("image.jpg", tracker=True) # Run inference with tracking
- >>> for result in results:
- ... if result.obb is not None:
- ... track_ids = result.obb.id
- ... if track_ids is not None:
- ... print(f"Tracking IDs: {track_ids}")
- """
- return self.data[:, -3] if self.is_track else None
- @property
- @lru_cache(maxsize=2)
- def xyxyxyxy(self):
- """
- Converts OBB format to 8-point (xyxyxyxy) coordinate format for rotated bounding boxes.
- Returns:
- (torch.Tensor | numpy.ndarray): Rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), where N is
- the number of boxes. Each box is represented by 4 points (x, y), starting from the top-left corner and
- moving clockwise.
- Examples:
- >>> obb = OBB(torch.tensor([[100, 100, 50, 30, 0.5, 0.9, 0]]), orig_shape=(640, 640))
- >>> xyxyxyxy = obb.xyxyxyxy
- >>> print(xyxyxyxy.shape)
- torch.Size([1, 4, 2])
- """
- return ops.xywhr2xyxyxyxy(self.xywhr)
- @property
- @lru_cache(maxsize=2)
- def xyxyxyxyn(self):
- """
- Converts rotated bounding boxes to normalized xyxyxyxy format.
- Returns:
- (torch.Tensor | numpy.ndarray): Normalized rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2),
- where N is the number of boxes. Each box is represented by 4 points (x, y), normalized relative to
- the original image dimensions.
- Examples:
- >>> obb = OBB(torch.rand(10, 7), orig_shape=(640, 480)) # 10 random OBBs
- >>> normalized_boxes = obb.xyxyxyxyn
- >>> print(normalized_boxes.shape)
- torch.Size([10, 4, 2])
- """
- xyxyxyxyn = self.xyxyxyxy.clone() if isinstance(self.xyxyxyxy, torch.Tensor) else np.copy(self.xyxyxyxy)
- xyxyxyxyn[..., 0] /= self.orig_shape[1]
- xyxyxyxyn[..., 1] /= self.orig_shape[0]
- return xyxyxyxyn
- @property
- @lru_cache(maxsize=2)
- def xyxy(self):
- """
- Converts oriented bounding boxes (OBB) to axis-aligned bounding boxes in xyxy format.
- This property calculates the minimal enclosing rectangle for each oriented bounding box and returns it in
- xyxy format (x1, y1, x2, y2). This is useful for operations that require axis-aligned bounding boxes, such
- as IoU calculation with non-rotated boxes.
- Returns:
- (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in xyxy format with shape (N, 4), where N
- is the number of boxes. Each row contains [x1, y1, x2, y2] coordinates.
- Examples:
- >>> import torch
- >>> from ultralytics import YOLO
- >>> model = YOLO("yolov8n-obb.pt")
- >>> results = model("path/to/image.jpg")
- >>> for result in results:
- ... obb = result.obb
- ... if obb is not None:
- ... xyxy_boxes = obb.xyxy
- ... print(xyxy_boxes.shape) # (N, 4)
- Notes:
- - This method approximates the OBB by its minimal enclosing rectangle.
- - The returned format is compatible with standard object detection metrics and visualization tools.
- - The property uses caching to improve performance for repeated access.
- """
- x = self.xyxyxyxy[..., 0]
- y = self.xyxyxyxy[..., 1]
- return (
- torch.stack([x.amin(1), y.amin(1), x.amax(1), y.amax(1)], -1)
- if isinstance(x, torch.Tensor)
- else np.stack([x.min(1), y.min(1), x.max(1), y.max(1)], -1)
- )
|