tuner.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
  2. from ultralytics.cfg import TASK2DATA, TASK2METRIC, get_cfg, get_save_dir
  3. from ultralytics.utils import DEFAULT_CFG, DEFAULT_CFG_DICT, LOGGER, NUM_THREADS, checks
  4. def run_ray_tune(
  5. model,
  6. space: dict = None,
  7. grace_period: int = 10,
  8. gpu_per_trial: int = None,
  9. max_samples: int = 10,
  10. **train_args,
  11. ):
  12. """
  13. Runs hyperparameter tuning using Ray Tune.
  14. Args:
  15. model (YOLO): Model to run the tuner on.
  16. space (dict, optional): The hyperparameter search space. Defaults to None.
  17. grace_period (int, optional): The grace period in epochs of the ASHA scheduler. Defaults to 10.
  18. gpu_per_trial (int, optional): The number of GPUs to allocate per trial. Defaults to None.
  19. max_samples (int, optional): The maximum number of trials to run. Defaults to 10.
  20. train_args (dict, optional): Additional arguments to pass to the `train()` method. Defaults to {}.
  21. Returns:
  22. (dict): A dictionary containing the results of the hyperparameter search.
  23. Example:
  24. ```python
  25. from ultralytics import YOLO
  26. # Load a YOLOv8n model
  27. model = YOLO("yolo11n.pt")
  28. # Start tuning hyperparameters for YOLOv8n training on the COCO8 dataset
  29. result_grid = model.tune(data="coco8.yaml", use_ray=True)
  30. ```
  31. """
  32. LOGGER.info("💡 Learn about RayTune at https://docs.ultralytics.com/integrations/ray-tune")
  33. if train_args is None:
  34. train_args = {}
  35. try:
  36. checks.check_requirements("ray[tune]")
  37. import ray
  38. from ray import tune
  39. from ray.air import RunConfig
  40. from ray.air.integrations.wandb import WandbLoggerCallback
  41. from ray.tune.schedulers import ASHAScheduler
  42. except ImportError:
  43. raise ModuleNotFoundError('Ray Tune required but not found. To install run: pip install "ray[tune]"')
  44. try:
  45. import wandb
  46. assert hasattr(wandb, "__version__")
  47. except (ImportError, AssertionError):
  48. wandb = False
  49. checks.check_version(ray.__version__, ">=2.0.0", "ray")
  50. default_space = {
  51. # 'optimizer': tune.choice(['SGD', 'Adam', 'AdamW', 'NAdam', 'RAdam', 'RMSProp']),
  52. "lr0": tune.uniform(1e-5, 1e-1),
  53. "lrf": tune.uniform(0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
  54. "momentum": tune.uniform(0.6, 0.98), # SGD momentum/Adam beta1
  55. "weight_decay": tune.uniform(0.0, 0.001), # optimizer weight decay 5e-4
  56. "warmup_epochs": tune.uniform(0.0, 5.0), # warmup epochs (fractions ok)
  57. "warmup_momentum": tune.uniform(0.0, 0.95), # warmup initial momentum
  58. "box": tune.uniform(0.02, 0.2), # box loss gain
  59. "cls": tune.uniform(0.2, 4.0), # cls loss gain (scale with pixels)
  60. "hsv_h": tune.uniform(0.0, 0.1), # image HSV-Hue augmentation (fraction)
  61. "hsv_s": tune.uniform(0.0, 0.9), # image HSV-Saturation augmentation (fraction)
  62. "hsv_v": tune.uniform(0.0, 0.9), # image HSV-Value augmentation (fraction)
  63. "degrees": tune.uniform(0.0, 45.0), # image rotation (+/- deg)
  64. "translate": tune.uniform(0.0, 0.9), # image translation (+/- fraction)
  65. "scale": tune.uniform(0.0, 0.9), # image scale (+/- gain)
  66. "shear": tune.uniform(0.0, 10.0), # image shear (+/- deg)
  67. "perspective": tune.uniform(0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
  68. "flipud": tune.uniform(0.0, 1.0), # image flip up-down (probability)
  69. "fliplr": tune.uniform(0.0, 1.0), # image flip left-right (probability)
  70. "bgr": tune.uniform(0.0, 1.0), # image channel BGR (probability)
  71. "mosaic": tune.uniform(0.0, 1.0), # image mixup (probability)
  72. "mixup": tune.uniform(0.0, 1.0), # image mixup (probability)
  73. "copy_paste": tune.uniform(0.0, 1.0), # segment copy-paste (probability)
  74. }
  75. # Put the model in ray store
  76. task = model.task
  77. model_in_store = ray.put(model)
  78. def _tune(config):
  79. """
  80. Trains the YOLO model with the specified hyperparameters and additional arguments.
  81. Args:
  82. config (dict): A dictionary of hyperparameters to use for training.
  83. Returns:
  84. None
  85. """
  86. model_to_train = ray.get(model_in_store) # get the model from ray store for tuning
  87. model_to_train.reset_callbacks()
  88. config.update(train_args)
  89. results = model_to_train.train(**config)
  90. return results.results_dict
  91. # Get search space
  92. if not space:
  93. space = default_space
  94. LOGGER.warning("WARNING ⚠️ search space not provided, using default search space.")
  95. # Get dataset
  96. data = train_args.get("data", TASK2DATA[task])
  97. space["data"] = data
  98. if "data" not in train_args:
  99. LOGGER.warning(f'WARNING ⚠️ data not provided, using default "data={data}".')
  100. # Define the trainable function with allocated resources
  101. trainable_with_resources = tune.with_resources(_tune, {"cpu": NUM_THREADS, "gpu": gpu_per_trial or 0})
  102. # Define the ASHA scheduler for hyperparameter search
  103. asha_scheduler = ASHAScheduler(
  104. time_attr="epoch",
  105. metric=TASK2METRIC[task],
  106. mode="max",
  107. max_t=train_args.get("epochs") or DEFAULT_CFG_DICT["epochs"] or 100,
  108. grace_period=grace_period,
  109. reduction_factor=3,
  110. )
  111. # Define the callbacks for the hyperparameter search
  112. tuner_callbacks = [WandbLoggerCallback(project="YOLOv8-tune")] if wandb else []
  113. # Create the Ray Tune hyperparameter search tuner
  114. tune_dir = get_save_dir(
  115. get_cfg(DEFAULT_CFG, train_args), name=train_args.pop("name", "tune")
  116. ).resolve() # must be absolute dir
  117. tune_dir.mkdir(parents=True, exist_ok=True)
  118. tuner = tune.Tuner(
  119. trainable_with_resources,
  120. param_space=space,
  121. tune_config=tune.TuneConfig(scheduler=asha_scheduler, num_samples=max_samples),
  122. run_config=RunConfig(callbacks=tuner_callbacks, storage_path=tune_dir),
  123. )
  124. # Run the hyperparameter search
  125. tuner.fit()
  126. # Get the results of the hyperparameter search
  127. results = tuner.get_results()
  128. # Shut down Ray to clean up workers
  129. ray.shutdown()
  130. return results