hub.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
  2. import json
  3. from time import time
  4. from ultralytics.hub import HUB_WEB_ROOT, PREFIX, HUBTrainingSession, events
  5. from ultralytics.utils import LOGGER, RANK, SETTINGS
  6. def on_pretrain_routine_start(trainer):
  7. """Create a remote Ultralytics HUB session to log local model training."""
  8. if RANK in {-1, 0} and SETTINGS["hub"] is True and SETTINGS["api_key"] and trainer.hub_session is None:
  9. trainer.hub_session = HUBTrainingSession.create_session(trainer.args.model, trainer.args)
  10. def on_pretrain_routine_end(trainer):
  11. """Logs info before starting timer for upload rate limit."""
  12. if session := getattr(trainer, "hub_session", None):
  13. # Start timer for upload rate limit
  14. session.timers = {"metrics": time(), "ckpt": time()} # start timer on session.rate_limit
  15. def on_fit_epoch_end(trainer):
  16. """Uploads training progress metrics at the end of each epoch."""
  17. if session := getattr(trainer, "hub_session", None):
  18. # Upload metrics after val end
  19. all_plots = {
  20. **trainer.label_loss_items(trainer.tloss, prefix="train"),
  21. **trainer.metrics,
  22. }
  23. if trainer.epoch == 0:
  24. from ultralytics.utils.torch_utils import model_info_for_loggers
  25. all_plots = {**all_plots, **model_info_for_loggers(trainer)}
  26. session.metrics_queue[trainer.epoch] = json.dumps(all_plots)
  27. # If any metrics fail to upload, add them to the queue to attempt uploading again.
  28. if session.metrics_upload_failed_queue:
  29. session.metrics_queue.update(session.metrics_upload_failed_queue)
  30. if time() - session.timers["metrics"] > session.rate_limits["metrics"]:
  31. session.upload_metrics()
  32. session.timers["metrics"] = time() # reset timer
  33. session.metrics_queue = {} # reset queue
  34. def on_model_save(trainer):
  35. """Saves checkpoints to Ultralytics HUB with rate limiting."""
  36. if session := getattr(trainer, "hub_session", None):
  37. # Upload checkpoints with rate limiting
  38. is_best = trainer.best_fitness == trainer.fitness
  39. if time() - session.timers["ckpt"] > session.rate_limits["ckpt"]:
  40. LOGGER.info(f"{PREFIX}Uploading checkpoint {HUB_WEB_ROOT}/models/{session.model.id}")
  41. session.upload_model(trainer.epoch, trainer.last, is_best)
  42. session.timers["ckpt"] = time() # reset timer
  43. def on_train_end(trainer):
  44. """Upload final model and metrics to Ultralytics HUB at the end of training."""
  45. if session := getattr(trainer, "hub_session", None):
  46. # Upload final model and metrics with exponential standoff
  47. LOGGER.info(f"{PREFIX}Syncing final model...")
  48. session.upload_model(
  49. trainer.epoch,
  50. trainer.best,
  51. map=trainer.metrics.get("metrics/mAP50-95(B)", 0),
  52. final=True,
  53. )
  54. session.alive = False # stop heartbeats
  55. LOGGER.info(f"{PREFIX}Done ✅\n{PREFIX}View model at {session.model_url} 🚀")
  56. def on_train_start(trainer):
  57. """Run events on train start."""
  58. events(trainer.args)
  59. def on_val_start(validator):
  60. """Runs events on validation start."""
  61. events(validator.args)
  62. def on_predict_start(predictor):
  63. """Run events on predict start."""
  64. events(predictor.args)
  65. def on_export_start(exporter):
  66. """Run events on export start."""
  67. events(exporter.args)
  68. callbacks = (
  69. {
  70. "on_pretrain_routine_start": on_pretrain_routine_start,
  71. "on_pretrain_routine_end": on_pretrain_routine_end,
  72. "on_fit_epoch_end": on_fit_epoch_end,
  73. "on_model_save": on_model_save,
  74. "on_train_end": on_train_end,
  75. "on_train_start": on_train_start,
  76. "on_val_start": on_val_start,
  77. "on_predict_start": on_predict_start,
  78. "on_export_start": on_export_start,
  79. }
  80. if SETTINGS["hub"] is True
  81. else {}
  82. ) # verify enabled