autobackend.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763
  1. # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
  2. import ast
  3. import json
  4. import platform
  5. import zipfile
  6. from collections import OrderedDict, namedtuple
  7. from pathlib import Path
  8. import cv2
  9. import numpy as np
  10. import torch
  11. import torch.nn as nn
  12. from PIL import Image
  13. from ultralytics.utils import ARM64, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, ROOT, yaml_load
  14. from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml
  15. from ultralytics.utils.downloads import attempt_download_asset, is_url
  16. def check_class_names(names):
  17. """
  18. Check class names.
  19. Map imagenet class codes to human-readable names if required. Convert lists to dicts.
  20. """
  21. if isinstance(names, list): # names is a list
  22. names = dict(enumerate(names)) # convert to dict
  23. if isinstance(names, dict):
  24. # Convert 1) string keys to int, i.e. '0' to 0, and non-string values to strings, i.e. True to 'True'
  25. names = {int(k): str(v) for k, v in names.items()}
  26. n = len(names)
  27. if max(names.keys()) >= n:
  28. raise KeyError(
  29. f"{n}-class dataset requires class indices 0-{n - 1}, but you have invalid class indices "
  30. f"{min(names.keys())}-{max(names.keys())} defined in your dataset YAML."
  31. )
  32. if isinstance(names[0], str) and names[0].startswith("n0"): # imagenet class codes, i.e. 'n01440764'
  33. names_map = yaml_load(ROOT / "cfg/datasets/ImageNet.yaml")["map"] # human-readable names
  34. names = {k: names_map[v] for k, v in names.items()}
  35. return names
  36. def default_class_names(data=None):
  37. """Applies default class names to an input YAML file or returns numerical class names."""
  38. if data:
  39. try:
  40. return yaml_load(check_yaml(data))["names"]
  41. except Exception:
  42. pass
  43. return {i: f"class{i}" for i in range(999)} # return default if above errors
  44. class AutoBackend(nn.Module):
  45. """
  46. Handles dynamic backend selection for running inference using Ultralytics YOLO models.
  47. The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide
  48. range of formats, each with specific naming conventions as outlined below:
  49. Supported Formats and Naming Conventions:
  50. | Format | File Suffix |
  51. |-----------------------|-------------------|
  52. | PyTorch | *.pt |
  53. | TorchScript | *.torchscript |
  54. | ONNX Runtime | *.onnx |
  55. | ONNX OpenCV DNN | *.onnx (dnn=True) |
  56. | OpenVINO | *openvino_model/ |
  57. | CoreML | *.mlpackage |
  58. | TensorRT | *.engine |
  59. | TensorFlow SavedModel | *_saved_model/ |
  60. | TensorFlow GraphDef | *.pb |
  61. | TensorFlow Lite | *.tflite |
  62. | TensorFlow Edge TPU | *_edgetpu.tflite |
  63. | PaddlePaddle | *_paddle_model/ |
  64. | MNN | *.mnn |
  65. | NCNN | *_ncnn_model/ |
  66. This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy
  67. models across various platforms.
  68. """
  69. @torch.no_grad()
  70. def __init__(
  71. self,
  72. weights="yolo11n.pt",
  73. device=torch.device("cpu"),
  74. dnn=False,
  75. data=None,
  76. fp16=False,
  77. batch=1,
  78. fuse=True,
  79. verbose=True,
  80. ):
  81. """
  82. Initialize the AutoBackend for inference.
  83. Args:
  84. weights (str | torch.nn.Module): Path to the model weights file or a module instance. Defaults to 'yolo11n.pt'.
  85. device (torch.device): Device to run the model on. Defaults to CPU.
  86. dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False.
  87. data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional.
  88. fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False.
  89. batch (int): Batch-size to assume for inference.
  90. fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True.
  91. verbose (bool): Enable verbose logging. Defaults to True.
  92. """
  93. super().__init__()
  94. w = str(weights[0] if isinstance(weights, list) else weights)
  95. nn_module = isinstance(weights, torch.nn.Module)
  96. (
  97. pt,
  98. jit,
  99. onnx,
  100. xml,
  101. engine,
  102. coreml,
  103. saved_model,
  104. pb,
  105. tflite,
  106. edgetpu,
  107. tfjs,
  108. paddle,
  109. mnn,
  110. ncnn,
  111. imx,
  112. triton,
  113. ) = self._model_type(w)
  114. fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16
  115. nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
  116. stride = 32 # default stride
  117. model, metadata, task = None, None, None
  118. # Set device
  119. cuda = torch.cuda.is_available() and device.type != "cpu" # use CUDA
  120. if cuda and not any([nn_module, pt, jit, engine, onnx, paddle]): # GPU dataloader formats
  121. device = torch.device("cpu")
  122. cuda = False
  123. # Download if not local
  124. if not (pt or triton or nn_module):
  125. w = attempt_download_asset(w)
  126. # In-memory PyTorch model
  127. if nn_module:
  128. model = weights.to(device)
  129. if fuse:
  130. model = model.fuse(verbose=verbose)
  131. if hasattr(model, "kpt_shape"):
  132. kpt_shape = model.kpt_shape # pose-only
  133. stride = max(int(model.stride.max()), 32) # model stride
  134. names = model.module.names if hasattr(model, "module") else model.names # get class names
  135. model.half() if fp16 else model.float()
  136. self.model = model # explicitly assign for to(), cpu(), cuda(), half()
  137. pt = True
  138. # PyTorch
  139. elif pt:
  140. from ultralytics.nn.tasks import attempt_load_weights
  141. model = attempt_load_weights(
  142. weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse
  143. )
  144. if hasattr(model, "kpt_shape"):
  145. kpt_shape = model.kpt_shape # pose-only
  146. stride = max(int(model.stride.max()), 32) # model stride
  147. names = model.module.names if hasattr(model, "module") else model.names # get class names
  148. model.half() if fp16 else model.float()
  149. self.model = model # explicitly assign for to(), cpu(), cuda(), half()
  150. # TorchScript
  151. elif jit:
  152. LOGGER.info(f"Loading {w} for TorchScript inference...")
  153. extra_files = {"config.txt": ""} # model metadata
  154. model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
  155. model.half() if fp16 else model.float()
  156. if extra_files["config.txt"]: # load metadata dict
  157. metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items()))
  158. # ONNX OpenCV DNN
  159. elif dnn:
  160. LOGGER.info(f"Loading {w} for ONNX OpenCV DNN inference...")
  161. check_requirements("opencv-python>=4.5.4")
  162. net = cv2.dnn.readNetFromONNX(w)
  163. # ONNX Runtime and IMX
  164. elif onnx or imx:
  165. LOGGER.info(f"Loading {w} for ONNX Runtime inference...")
  166. check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))
  167. if IS_RASPBERRYPI or IS_JETSON:
  168. # Fix 'numpy.linalg._umath_linalg' has no attribute '_ilp64' for TF SavedModel on RPi and Jetson
  169. check_requirements("numpy==1.23.5")
  170. import onnxruntime
  171. providers = ["CPUExecutionProvider"]
  172. if cuda and "CUDAExecutionProvider" in onnxruntime.get_available_providers():
  173. providers.insert(0, "CUDAExecutionProvider")
  174. elif cuda: # Only log warning if CUDA was requested but unavailable
  175. LOGGER.warning("WARNING ⚠️ Failed to start ONNX Runtime with CUDA. Using CPU...")
  176. device = torch.device("cpu")
  177. cuda = False
  178. LOGGER.info(f"Using ONNX Runtime {providers[0]}")
  179. if onnx:
  180. session = onnxruntime.InferenceSession(w, providers=providers)
  181. else:
  182. check_requirements(
  183. ["model-compression-toolkit==2.1.1", "sony-custom-layers[torch]==0.2.0", "onnxruntime-extensions"]
  184. )
  185. w = next(Path(w).glob("*.onnx"))
  186. LOGGER.info(f"Loading {w} for ONNX IMX inference...")
  187. import mct_quantizers as mctq
  188. from sony_custom_layers.pytorch.object_detection import nms_ort # noqa
  189. session = onnxruntime.InferenceSession(
  190. w, mctq.get_ort_session_options(), providers=["CPUExecutionProvider"]
  191. )
  192. task = "detect"
  193. output_names = [x.name for x in session.get_outputs()]
  194. metadata = session.get_modelmeta().custom_metadata_map
  195. dynamic = isinstance(session.get_outputs()[0].shape[0], str)
  196. if not dynamic:
  197. io = session.io_binding()
  198. bindings = []
  199. for output in session.get_outputs():
  200. y_tensor = torch.empty(output.shape, dtype=torch.float16 if fp16 else torch.float32).to(device)
  201. io.bind_output(
  202. name=output.name,
  203. device_type=device.type,
  204. device_id=device.index if cuda else 0,
  205. element_type=np.float16 if fp16 else np.float32,
  206. shape=tuple(y_tensor.shape),
  207. buffer_ptr=y_tensor.data_ptr(),
  208. )
  209. bindings.append(y_tensor)
  210. # OpenVINO
  211. elif xml:
  212. LOGGER.info(f"Loading {w} for OpenVINO inference...")
  213. check_requirements("openvino>=2024.0.0")
  214. import openvino as ov
  215. core = ov.Core()
  216. w = Path(w)
  217. if not w.is_file(): # if not *.xml
  218. w = next(w.glob("*.xml")) # get *.xml file from *_openvino_model dir
  219. ov_model = core.read_model(model=str(w), weights=w.with_suffix(".bin"))
  220. if ov_model.get_parameters()[0].get_layout().empty:
  221. ov_model.get_parameters()[0].set_layout(ov.Layout("NCHW"))
  222. # OpenVINO inference modes are 'LATENCY', 'THROUGHPUT' (not recommended), or 'CUMULATIVE_THROUGHPUT'
  223. inference_mode = "CUMULATIVE_THROUGHPUT" if batch > 1 else "LATENCY"
  224. LOGGER.info(f"Using OpenVINO {inference_mode} mode for batch={batch} inference...")
  225. ov_compiled_model = core.compile_model(
  226. ov_model,
  227. device_name="AUTO", # AUTO selects best available device, do not modify
  228. config={"PERFORMANCE_HINT": inference_mode},
  229. )
  230. input_name = ov_compiled_model.input().get_any_name()
  231. metadata = w.parent / "metadata.yaml"
  232. # TensorRT
  233. elif engine:
  234. LOGGER.info(f"Loading {w} for TensorRT inference...")
  235. try:
  236. import tensorrt as trt # noqa https://developer.nvidia.com/nvidia-tensorrt-download
  237. except ImportError:
  238. if LINUX:
  239. check_requirements("tensorrt>7.0.0,!=10.1.0")
  240. import tensorrt as trt # noqa
  241. check_version(trt.__version__, ">=7.0.0", hard=True)
  242. check_version(trt.__version__, "!=10.1.0", msg="https://github.com/ultralytics/ultralytics/pull/14239")
  243. if device.type == "cpu":
  244. device = torch.device("cuda:0")
  245. Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
  246. logger = trt.Logger(trt.Logger.INFO)
  247. # Read file
  248. with open(w, "rb") as f, trt.Runtime(logger) as runtime:
  249. try:
  250. meta_len = int.from_bytes(f.read(4), byteorder="little") # read metadata length
  251. metadata = json.loads(f.read(meta_len).decode("utf-8")) # read metadata
  252. except UnicodeDecodeError:
  253. f.seek(0) # engine file may lack embedded Ultralytics metadata
  254. model = runtime.deserialize_cuda_engine(f.read()) # read engine
  255. # Model context
  256. try:
  257. context = model.create_execution_context()
  258. except Exception as e: # model is None
  259. LOGGER.error(f"ERROR: TensorRT model exported with a different version than {trt.__version__}\n")
  260. raise e
  261. bindings = OrderedDict()
  262. output_names = []
  263. fp16 = False # default updated below
  264. dynamic = False
  265. is_trt10 = not hasattr(model, "num_bindings")
  266. num = range(model.num_io_tensors) if is_trt10 else range(model.num_bindings)
  267. for i in num:
  268. if is_trt10:
  269. name = model.get_tensor_name(i)
  270. dtype = trt.nptype(model.get_tensor_dtype(name))
  271. is_input = model.get_tensor_mode(name) == trt.TensorIOMode.INPUT
  272. if is_input:
  273. if -1 in tuple(model.get_tensor_shape(name)):
  274. dynamic = True
  275. context.set_input_shape(name, tuple(model.get_tensor_profile_shape(name, 0)[1]))
  276. if dtype == np.float16:
  277. fp16 = True
  278. else:
  279. output_names.append(name)
  280. shape = tuple(context.get_tensor_shape(name))
  281. else: # TensorRT < 10.0
  282. name = model.get_binding_name(i)
  283. dtype = trt.nptype(model.get_binding_dtype(i))
  284. is_input = model.binding_is_input(i)
  285. if model.binding_is_input(i):
  286. if -1 in tuple(model.get_binding_shape(i)): # dynamic
  287. dynamic = True
  288. context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[1]))
  289. if dtype == np.float16:
  290. fp16 = True
  291. else:
  292. output_names.append(name)
  293. shape = tuple(context.get_binding_shape(i))
  294. im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
  295. bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
  296. binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
  297. batch_size = bindings["images"].shape[0] # if dynamic, this is instead max batch size
  298. # CoreML
  299. elif coreml:
  300. LOGGER.info(f"Loading {w} for CoreML inference...")
  301. import coremltools as ct
  302. model = ct.models.MLModel(w)
  303. metadata = dict(model.user_defined_metadata)
  304. # TF SavedModel
  305. elif saved_model:
  306. LOGGER.info(f"Loading {w} for TensorFlow SavedModel inference...")
  307. import tensorflow as tf
  308. keras = False # assume TF1 saved_model
  309. model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
  310. metadata = Path(w) / "metadata.yaml"
  311. # TF GraphDef
  312. elif pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
  313. LOGGER.info(f"Loading {w} for TensorFlow GraphDef inference...")
  314. import tensorflow as tf
  315. from ultralytics.engine.exporter import gd_outputs
  316. def wrap_frozen_graph(gd, inputs, outputs):
  317. """Wrap frozen graphs for deployment."""
  318. x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
  319. ge = x.graph.as_graph_element
  320. return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
  321. gd = tf.Graph().as_graph_def() # TF GraphDef
  322. with open(w, "rb") as f:
  323. gd.ParseFromString(f.read())
  324. frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd))
  325. try: # find metadata in SavedModel alongside GraphDef
  326. metadata = next(Path(w).resolve().parent.rglob(f"{Path(w).stem}_saved_model*/metadata.yaml"))
  327. except StopIteration:
  328. pass
  329. # TFLite or TFLite Edge TPU
  330. elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
  331. try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
  332. from tflite_runtime.interpreter import Interpreter, load_delegate
  333. except ImportError:
  334. import tensorflow as tf
  335. Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate
  336. if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
  337. device = device[3:] if str(device).startswith("tpu") else ":0"
  338. LOGGER.info(f"Loading {w} on device {device[1:]} for TensorFlow Lite Edge TPU inference...")
  339. delegate = {"Linux": "libedgetpu.so.1", "Darwin": "libedgetpu.1.dylib", "Windows": "edgetpu.dll"}[
  340. platform.system()
  341. ]
  342. interpreter = Interpreter(
  343. model_path=w,
  344. experimental_delegates=[load_delegate(delegate, options={"device": device})],
  345. )
  346. device = "cpu" # Required, otherwise PyTorch will try to use the wrong device
  347. else: # TFLite
  348. LOGGER.info(f"Loading {w} for TensorFlow Lite inference...")
  349. interpreter = Interpreter(model_path=w) # load TFLite model
  350. interpreter.allocate_tensors() # allocate
  351. input_details = interpreter.get_input_details() # inputs
  352. output_details = interpreter.get_output_details() # outputs
  353. # Load metadata
  354. try:
  355. with zipfile.ZipFile(w, "r") as model:
  356. meta_file = model.namelist()[0]
  357. metadata = ast.literal_eval(model.read(meta_file).decode("utf-8"))
  358. except zipfile.BadZipFile:
  359. pass
  360. # TF.js
  361. elif tfjs:
  362. raise NotImplementedError("YOLOv8 TF.js inference is not currently supported.")
  363. # PaddlePaddle
  364. elif paddle:
  365. LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
  366. check_requirements("paddlepaddle-gpu" if cuda else "paddlepaddle")
  367. import paddle.inference as pdi # noqa
  368. w = Path(w)
  369. if not w.is_file(): # if not *.pdmodel
  370. w = next(w.rglob("*.pdmodel")) # get *.pdmodel file from *_paddle_model dir
  371. config = pdi.Config(str(w), str(w.with_suffix(".pdiparams")))
  372. if cuda:
  373. config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
  374. predictor = pdi.create_predictor(config)
  375. input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
  376. output_names = predictor.get_output_names()
  377. metadata = w.parents[1] / "metadata.yaml"
  378. # MNN
  379. elif mnn:
  380. LOGGER.info(f"Loading {w} for MNN inference...")
  381. check_requirements("MNN") # requires MNN
  382. import os
  383. import MNN
  384. config = {"precision": "low", "backend": "CPU", "numThread": (os.cpu_count() + 1) // 2}
  385. rt = MNN.nn.create_runtime_manager((config,))
  386. net = MNN.nn.load_module_from_file(w, [], [], runtime_manager=rt, rearrange=True)
  387. def torch_to_mnn(x):
  388. return MNN.expr.const(x.data_ptr(), x.shape)
  389. metadata = json.loads(net.get_info()["bizCode"])
  390. # NCNN
  391. elif ncnn:
  392. LOGGER.info(f"Loading {w} for NCNN inference...")
  393. check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn") # requires NCNN
  394. import ncnn as pyncnn
  395. net = pyncnn.Net()
  396. net.opt.use_vulkan_compute = cuda
  397. w = Path(w)
  398. if not w.is_file(): # if not *.param
  399. w = next(w.glob("*.param")) # get *.param file from *_ncnn_model dir
  400. net.load_param(str(w))
  401. net.load_model(str(w.with_suffix(".bin")))
  402. metadata = w.parent / "metadata.yaml"
  403. # NVIDIA Triton Inference Server
  404. elif triton:
  405. check_requirements("tritonclient[all]")
  406. from ultralytics.utils.triton import TritonRemoteModel
  407. model = TritonRemoteModel(w)
  408. metadata = model.metadata
  409. # Any other format (unsupported)
  410. else:
  411. from ultralytics.engine.exporter import export_formats
  412. raise TypeError(
  413. f"model='{w}' is not a supported model format. Ultralytics supports: {export_formats()['Format']}\n"
  414. f"See https://docs.ultralytics.com/modes/predict for help."
  415. )
  416. # Load external metadata YAML
  417. if isinstance(metadata, (str, Path)) and Path(metadata).exists():
  418. metadata = yaml_load(metadata)
  419. if metadata and isinstance(metadata, dict):
  420. for k, v in metadata.items():
  421. if k in {"stride", "batch"}:
  422. metadata[k] = int(v)
  423. elif k in {"imgsz", "names", "kpt_shape"} and isinstance(v, str):
  424. metadata[k] = eval(v)
  425. stride = metadata["stride"]
  426. task = metadata["task"]
  427. batch = metadata["batch"]
  428. imgsz = metadata["imgsz"]
  429. names = metadata["names"]
  430. kpt_shape = metadata.get("kpt_shape")
  431. elif not (pt or triton or nn_module):
  432. LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'")
  433. # Check names
  434. if "names" not in locals(): # names missing
  435. names = default_class_names(data)
  436. names = check_class_names(names)
  437. # Disable gradients
  438. if pt:
  439. for p in model.parameters():
  440. p.requires_grad = False
  441. self.__dict__.update(locals()) # assign all variables to self
  442. def forward(self, im, augment=False, visualize=False, embed=None):
  443. """
  444. Runs inference on the YOLOv8 MultiBackend model.
  445. Args:
  446. im (torch.Tensor): The image tensor to perform inference on.
  447. augment (bool): whether to perform data augmentation during inference, defaults to False
  448. visualize (bool): whether to visualize the output predictions, defaults to False
  449. embed (list, optional): A list of feature vectors/embeddings to return.
  450. Returns:
  451. (tuple): Tuple containing the raw output tensor, and processed output for visualization (if visualize=True)
  452. """
  453. b, ch, h, w = im.shape # batch, channel, height, width
  454. if self.fp16 and im.dtype != torch.float16:
  455. im = im.half() # to FP16
  456. if self.nhwc:
  457. im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3)
  458. # PyTorch
  459. if self.pt or self.nn_module:
  460. y = self.model(im, augment=augment, visualize=visualize, embed=embed)
  461. # TorchScript
  462. elif self.jit:
  463. y = self.model(im)
  464. # ONNX OpenCV DNN
  465. elif self.dnn:
  466. im = im.cpu().numpy() # torch to numpy
  467. self.net.setInput(im)
  468. y = self.net.forward()
  469. # ONNX Runtime
  470. elif self.onnx or self.imx:
  471. if self.dynamic:
  472. im = im.cpu().numpy() # torch to numpy
  473. y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
  474. else:
  475. if not self.cuda:
  476. im = im.cpu()
  477. self.io.bind_input(
  478. name="images",
  479. device_type=im.device.type,
  480. device_id=im.device.index if im.device.type == "cuda" else 0,
  481. element_type=np.float16 if self.fp16 else np.float32,
  482. shape=tuple(im.shape),
  483. buffer_ptr=im.data_ptr(),
  484. )
  485. self.session.run_with_iobinding(self.io)
  486. y = self.bindings
  487. if self.imx:
  488. # boxes, conf, cls
  489. y = np.concatenate([y[0], y[1][:, :, None], y[2][:, :, None]], axis=-1)
  490. # OpenVINO
  491. elif self.xml:
  492. im = im.cpu().numpy() # FP32
  493. if self.inference_mode in {"THROUGHPUT", "CUMULATIVE_THROUGHPUT"}: # optimized for larger batch-sizes
  494. n = im.shape[0] # number of images in batch
  495. results = [None] * n # preallocate list with None to match the number of images
  496. def callback(request, userdata):
  497. """Places result in preallocated list using userdata index."""
  498. results[userdata] = request.results
  499. # Create AsyncInferQueue, set the callback and start asynchronous inference for each input image
  500. async_queue = self.ov.runtime.AsyncInferQueue(self.ov_compiled_model)
  501. async_queue.set_callback(callback)
  502. for i in range(n):
  503. # Start async inference with userdata=i to specify the position in results list
  504. async_queue.start_async(inputs={self.input_name: im[i : i + 1]}, userdata=i) # keep image as BCHW
  505. async_queue.wait_all() # wait for all inference requests to complete
  506. y = np.concatenate([list(r.values())[0] for r in results])
  507. else: # inference_mode = "LATENCY", optimized for fastest first result at batch-size 1
  508. y = list(self.ov_compiled_model(im).values())
  509. # TensorRT
  510. elif self.engine:
  511. if self.dynamic and im.shape != self.bindings["images"].shape:
  512. if self.is_trt10:
  513. self.context.set_input_shape("images", im.shape)
  514. self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)
  515. for name in self.output_names:
  516. self.bindings[name].data.resize_(tuple(self.context.get_tensor_shape(name)))
  517. else:
  518. i = self.model.get_binding_index("images")
  519. self.context.set_binding_shape(i, im.shape)
  520. self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)
  521. for name in self.output_names:
  522. i = self.model.get_binding_index(name)
  523. self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
  524. s = self.bindings["images"].shape
  525. assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
  526. self.binding_addrs["images"] = int(im.data_ptr())
  527. self.context.execute_v2(list(self.binding_addrs.values()))
  528. y = [self.bindings[x].data for x in sorted(self.output_names)]
  529. # CoreML
  530. elif self.coreml:
  531. im = im[0].cpu().numpy()
  532. im_pil = Image.fromarray((im * 255).astype("uint8"))
  533. # im = im.resize((192, 320), Image.BILINEAR)
  534. y = self.model.predict({"image": im_pil}) # coordinates are xywh normalized
  535. if "confidence" in y:
  536. raise TypeError(
  537. "Ultralytics only supports inference of non-pipelined CoreML models exported with "
  538. f"'nms=False', but 'model={w}' has an NMS pipeline created by an 'nms=True' export."
  539. )
  540. # TODO: CoreML NMS inference handling
  541. # from ultralytics.utils.ops import xywh2xyxy
  542. # box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
  543. # conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float32)
  544. # y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
  545. y = list(y.values())
  546. if len(y) == 2 and len(y[1].shape) != 4: # segmentation model
  547. y = list(reversed(y)) # reversed for segmentation models (pred, proto)
  548. # PaddlePaddle
  549. elif self.paddle:
  550. im = im.cpu().numpy().astype(np.float32)
  551. self.input_handle.copy_from_cpu(im)
  552. self.predictor.run()
  553. y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
  554. # MNN
  555. elif self.mnn:
  556. input_var = self.torch_to_mnn(im)
  557. output_var = self.net.onForward([input_var])
  558. y = [x.read() for x in output_var]
  559. # NCNN
  560. elif self.ncnn:
  561. mat_in = self.pyncnn.Mat(im[0].cpu().numpy())
  562. with self.net.create_extractor() as ex:
  563. ex.input(self.net.input_names()[0], mat_in)
  564. # WARNING: 'output_names' sorted as a temporary fix for https://github.com/pnnx/pnnx/issues/130
  565. y = [np.array(ex.extract(x)[1])[None] for x in sorted(self.net.output_names())]
  566. # NVIDIA Triton Inference Server
  567. elif self.triton:
  568. im = im.cpu().numpy() # torch to numpy
  569. y = self.model(im)
  570. # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
  571. else:
  572. im = im.cpu().numpy()
  573. if self.saved_model: # SavedModel
  574. y = self.model(im, training=False) if self.keras else self.model(im)
  575. if not isinstance(y, list):
  576. y = [y]
  577. elif self.pb: # GraphDef
  578. y = self.frozen_func(x=self.tf.constant(im))
  579. else: # Lite or Edge TPU
  580. details = self.input_details[0]
  581. is_int = details["dtype"] in {np.int8, np.int16} # is TFLite quantized int8 or int16 model
  582. if is_int:
  583. scale, zero_point = details["quantization"]
  584. im = (im / scale + zero_point).astype(details["dtype"]) # de-scale
  585. self.interpreter.set_tensor(details["index"], im)
  586. self.interpreter.invoke()
  587. y = []
  588. for output in self.output_details:
  589. x = self.interpreter.get_tensor(output["index"])
  590. if is_int:
  591. scale, zero_point = output["quantization"]
  592. x = (x.astype(np.float32) - zero_point) * scale # re-scale
  593. if x.ndim == 3: # if task is not classification, excluding masks (ndim=4) as well
  594. # Denormalize xywh by image size. See https://github.com/ultralytics/ultralytics/pull/1695
  595. # xywh are normalized in TFLite/EdgeTPU to mitigate quantization error of integer models
  596. if x.shape[-1] == 6: # end-to-end model
  597. x[:, :, [0, 2]] *= w
  598. x[:, :, [1, 3]] *= h
  599. else:
  600. x[:, [0, 2]] *= w
  601. x[:, [1, 3]] *= h
  602. if self.task == "pose":
  603. x[:, 5::3] *= w
  604. x[:, 6::3] *= h
  605. y.append(x)
  606. # TF segment fixes: export is reversed vs ONNX export and protos are transposed
  607. if len(y) == 2: # segment with (det, proto) output order reversed
  608. if len(y[1].shape) != 4:
  609. y = list(reversed(y)) # should be y = (1, 116, 8400), (1, 160, 160, 32)
  610. if y[1].shape[-1] == 6: # end-to-end model
  611. y = [y[1]]
  612. else:
  613. y[1] = np.transpose(y[1], (0, 3, 1, 2)) # should be y = (1, 116, 8400), (1, 32, 160, 160)
  614. y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
  615. # for x in y:
  616. # print(type(x), len(x)) if isinstance(x, (list, tuple)) else print(type(x), x.shape) # debug shapes
  617. if isinstance(y, (list, tuple)):
  618. if len(self.names) == 999 and (self.task == "segment" or len(y) == 2): # segments and names not defined
  619. nc = y[0].shape[1] - y[1].shape[1] - 4 # y = (1, 32, 160, 160), (1, 116, 8400)
  620. self.names = {i: f"class{i}" for i in range(nc)}
  621. return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
  622. else:
  623. return self.from_numpy(y)
  624. def from_numpy(self, x):
  625. """
  626. Convert a numpy array to a tensor.
  627. Args:
  628. x (np.ndarray): The array to be converted.
  629. Returns:
  630. (torch.Tensor): The converted tensor
  631. """
  632. return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
  633. def warmup(self, imgsz=(1, 3, 640, 640)):
  634. """
  635. Warm up the model by running one forward pass with a dummy input.
  636. Args:
  637. imgsz (tuple): The shape of the dummy input tensor in the format (batch_size, channels, height, width)
  638. """
  639. import torchvision # noqa (import here so torchvision import time not recorded in postprocess time)
  640. warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
  641. if any(warmup_types) and (self.device.type != "cpu" or self.triton):
  642. im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
  643. for _ in range(2 if self.jit else 1):
  644. self.forward(im) # warmup
  645. @staticmethod
  646. def _model_type(p="path/to/model.pt"):
  647. """
  648. Takes a path to a model file and returns the model type. Possibles types are pt, jit, onnx, xml, engine, coreml,
  649. saved_model, pb, tflite, edgetpu, tfjs, ncnn or paddle.
  650. Args:
  651. p: path to the model file. Defaults to path/to/model.pt
  652. Examples:
  653. >>> model = AutoBackend(weights="path/to/model.onnx")
  654. >>> model_type = model._model_type() # returns "onnx"
  655. """
  656. from ultralytics.engine.exporter import export_formats
  657. sf = export_formats()["Suffix"] # export suffixes
  658. if not is_url(p) and not isinstance(p, str):
  659. check_suffix(p, sf) # checks
  660. name = Path(p).name
  661. types = [s in name for s in sf]
  662. types[5] |= name.endswith(".mlmodel") # retain support for older Apple CoreML *.mlmodel formats
  663. types[8] &= not types[9] # tflite &= not edgetpu
  664. if any(types):
  665. triton = False
  666. else:
  667. from urllib.parse import urlsplit
  668. url = urlsplit(p)
  669. triton = bool(url.netloc) and bool(url.path) and url.scheme in {"http", "grpc"}
  670. return types + [triton]