train_world.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
  2. from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
  3. from ultralytics.data.utils import check_det_dataset
  4. from ultralytics.models.yolo.world import WorldTrainer
  5. from ultralytics.utils import DEFAULT_CFG
  6. from ultralytics.utils.torch_utils import de_parallel
  7. class WorldTrainerFromScratch(WorldTrainer):
  8. """
  9. A class extending the WorldTrainer class for training a world model from scratch on open-set dataset.
  10. Example:
  11. ```python
  12. from ultralytics.models.yolo.world.train_world import WorldTrainerFromScratch
  13. from ultralytics import YOLOWorld
  14. data = dict(
  15. train=dict(
  16. yolo_data=["Objects365.yaml"],
  17. grounding_data=[
  18. dict(
  19. img_path="../datasets/flickr30k/images",
  20. json_file="../datasets/flickr30k/final_flickr_separateGT_train.json",
  21. ),
  22. dict(
  23. img_path="../datasets/GQA/images",
  24. json_file="../datasets/GQA/final_mixed_train_no_coco.json",
  25. ),
  26. ],
  27. ),
  28. val=dict(yolo_data=["lvis.yaml"]),
  29. )
  30. model = YOLOWorld("yolov8s-worldv2.yaml")
  31. model.train(data=data, trainer=WorldTrainerFromScratch)
  32. ```
  33. """
  34. def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
  35. """Initialize a WorldTrainer object with given arguments."""
  36. if overrides is None:
  37. overrides = {}
  38. super().__init__(cfg, overrides, _callbacks)
  39. def build_dataset(self, img_path, mode="train", batch=None):
  40. """
  41. Build YOLO Dataset.
  42. Args:
  43. img_path (List[str] | str): Path to the folder containing images.
  44. mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
  45. batch (int, optional): Size of batches, this is for `rect`. Defaults to None.
  46. """
  47. gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
  48. if mode != "train":
  49. return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs)
  50. dataset = [
  51. build_yolo_dataset(self.args, im_path, batch, self.data, stride=gs, multi_modal=True)
  52. if isinstance(im_path, str)
  53. else build_grounding(self.args, im_path["img_path"], im_path["json_file"], batch, stride=gs)
  54. for im_path in img_path
  55. ]
  56. return YOLOConcatDataset(dataset) if len(dataset) > 1 else dataset[0]
  57. def get_dataset(self):
  58. """
  59. Get train, val path from data dict if it exists.
  60. Returns None if data format is not recognized.
  61. """
  62. final_data = {}
  63. data_yaml = self.args.data
  64. assert data_yaml.get("train", False), "train dataset not found" # object365.yaml
  65. assert data_yaml.get("val", False), "validation dataset not found" # lvis.yaml
  66. data = {k: [check_det_dataset(d) for d in v.get("yolo_data", [])] for k, v in data_yaml.items()}
  67. assert len(data["val"]) == 1, f"Only support validating on 1 dataset for now, but got {len(data['val'])}."
  68. val_split = "minival" if "lvis" in data["val"][0]["val"] else "val"
  69. for d in data["val"]:
  70. if d.get("minival") is None: # for lvis dataset
  71. continue
  72. d["minival"] = str(d["path"] / d["minival"])
  73. for s in ["train", "val"]:
  74. final_data[s] = [d["train" if s == "train" else val_split] for d in data[s]]
  75. # save grounding data if there's one
  76. grounding_data = data_yaml[s].get("grounding_data")
  77. if grounding_data is None:
  78. continue
  79. grounding_data = grounding_data if isinstance(grounding_data, list) else [grounding_data]
  80. for g in grounding_data:
  81. assert isinstance(g, dict), f"Grounding data should be provided in dict format, but got {type(g)}"
  82. final_data[s] += grounding_data
  83. # NOTE: to make training work properly, set `nc` and `names`
  84. final_data["nc"] = data["val"][0]["nc"]
  85. final_data["names"] = data["val"][0]["names"]
  86. self.data = final_data
  87. return final_data["train"], final_data["val"][0]
  88. def plot_training_labels(self):
  89. """DO NOT plot labels."""
  90. pass
  91. def final_eval(self):
  92. """Performs final evaluation and validation for object detection YOLO-World model."""
  93. val = self.args.data["val"]["yolo_data"][0]
  94. self.validator.args.data = val
  95. self.validator.args.split = "minival" if isinstance(val, str) and "lvis" in val else "val"
  96. return super().final_eval()