tensorboard.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
  2. from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr
  3. try:
  4. # WARNING: do not move SummaryWriter import due to protobuf bug https://github.com/ultralytics/ultralytics/pull/4674
  5. from torch.utils.tensorboard import SummaryWriter
  6. assert not TESTS_RUNNING # do not log pytest
  7. assert SETTINGS["tensorboard"] is True # verify integration is enabled
  8. WRITER = None # TensorBoard SummaryWriter instance
  9. PREFIX = colorstr("TensorBoard: ")
  10. # Imports below only required if TensorBoard enabled
  11. import warnings
  12. from copy import deepcopy
  13. from ultralytics.utils.torch_utils import de_parallel, torch
  14. except (ImportError, AssertionError, TypeError, AttributeError):
  15. # TypeError for handling 'Descriptors cannot not be created directly.' protobuf errors in Windows
  16. # AttributeError: module 'tensorflow' has no attribute 'io' if 'tensorflow' not installed
  17. SummaryWriter = None
  18. def _log_scalars(scalars, step=0):
  19. """Logs scalar values to TensorBoard."""
  20. if WRITER:
  21. for k, v in scalars.items():
  22. WRITER.add_scalar(k, v, step)
  23. def _log_tensorboard_graph(trainer):
  24. """Log model graph to TensorBoard."""
  25. # Input image
  26. imgsz = trainer.args.imgsz
  27. imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz
  28. p = next(trainer.model.parameters()) # for device, type
  29. im = torch.zeros((1, 3, *imgsz), device=p.device, dtype=p.dtype) # input image (must be zeros, not empty)
  30. with warnings.catch_warnings():
  31. warnings.simplefilter("ignore", category=UserWarning) # suppress jit trace warning
  32. warnings.simplefilter("ignore", category=torch.jit.TracerWarning) # suppress jit trace warning
  33. # Try simple method first (YOLO)
  34. try:
  35. trainer.model.eval() # place in .eval() mode to avoid BatchNorm statistics changes
  36. WRITER.add_graph(torch.jit.trace(de_parallel(trainer.model), im, strict=False), [])
  37. LOGGER.info(f"{PREFIX}model graph visualization added ✅")
  38. return
  39. except Exception:
  40. # Fallback to TorchScript export steps (RTDETR)
  41. try:
  42. model = deepcopy(de_parallel(trainer.model))
  43. model.eval()
  44. model = model.fuse(verbose=False)
  45. for m in model.modules():
  46. if hasattr(m, "export"): # Detect, RTDETRDecoder (Segment and Pose use Detect base class)
  47. m.export = True
  48. m.format = "torchscript"
  49. model(im) # dry run
  50. WRITER.add_graph(torch.jit.trace(model, im, strict=False), [])
  51. LOGGER.info(f"{PREFIX}model graph visualization added ✅")
  52. except Exception as e:
  53. LOGGER.warning(f"{PREFIX}WARNING ⚠️ TensorBoard graph visualization failure {e}")
  54. def on_pretrain_routine_start(trainer):
  55. """Initialize TensorBoard logging with SummaryWriter."""
  56. if SummaryWriter:
  57. try:
  58. global WRITER
  59. WRITER = SummaryWriter(str(trainer.save_dir))
  60. LOGGER.info(f"{PREFIX}Start with 'tensorboard --logdir {trainer.save_dir}', view at http://localhost:6006/")
  61. except Exception as e:
  62. LOGGER.warning(f"{PREFIX}WARNING ⚠️ TensorBoard not initialized correctly, not logging this run. {e}")
  63. def on_train_start(trainer):
  64. """Log TensorBoard graph."""
  65. if WRITER:
  66. _log_tensorboard_graph(trainer)
  67. def on_train_epoch_end(trainer):
  68. """Logs scalar statistics at the end of a training epoch."""
  69. _log_scalars(trainer.label_loss_items(trainer.tloss, prefix="train"), trainer.epoch + 1)
  70. _log_scalars(trainer.lr, trainer.epoch + 1)
  71. def on_fit_epoch_end(trainer):
  72. """Logs epoch metrics at end of training epoch."""
  73. _log_scalars(trainer.metrics, trainer.epoch + 1)
  74. callbacks = (
  75. {
  76. "on_pretrain_routine_start": on_pretrain_routine_start,
  77. "on_train_start": on_train_start,
  78. "on_fit_epoch_end": on_fit_epoch_end,
  79. "on_train_epoch_end": on_train_epoch_end,
  80. }
  81. if SummaryWriter
  82. else {}
  83. )