SKU-110K.yaml 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
  2. # SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by Trax Retail
  3. # Documentation: https://docs.ultralytics.com/datasets/detect/sku-110k/
  4. # Example usage: yolo train data=SKU-110K.yaml
  5. # parent
  6. # ├── ultralytics
  7. # └── datasets
  8. # └── SKU-110K ← downloads here (13.6 GB)
  9. # Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
  10. path: ../datasets/SKU-110K # dataset root dir
  11. train: train.txt # train images (relative to 'path') 8219 images
  12. val: val.txt # val images (relative to 'path') 588 images
  13. test: test.txt # test images (optional) 2936 images
  14. # Classes
  15. names:
  16. 0: object
  17. # Download script/URL (optional) ---------------------------------------------------------------------------------------
  18. download: |
  19. import shutil
  20. from pathlib import Path
  21. import numpy as np
  22. import pandas as pd
  23. from tqdm import tqdm
  24. from ultralytics.utils.downloads import download
  25. from ultralytics.utils.ops import xyxy2xywh
  26. # Download
  27. dir = Path(yaml['path']) # dataset root dir
  28. parent = Path(dir.parent) # download dir
  29. urls = ['http://trax-geometry.s3.amazonaws.com/cvpr_challenge/SKU110K_fixed.tar.gz']
  30. download(urls, dir=parent)
  31. # Rename directories
  32. if dir.exists():
  33. shutil.rmtree(dir)
  34. (parent / 'SKU110K_fixed').rename(dir) # rename dir
  35. (dir / 'labels').mkdir(parents=True, exist_ok=True) # create labels dir
  36. # Convert labels
  37. names = 'image', 'x1', 'y1', 'x2', 'y2', 'class', 'image_width', 'image_height' # column names
  38. for d in 'annotations_train.csv', 'annotations_val.csv', 'annotations_test.csv':
  39. x = pd.read_csv(dir / 'annotations' / d, names=names).values # annotations
  40. images, unique_images = x[:, 0], np.unique(x[:, 0])
  41. with open((dir / d).with_suffix('.txt').__str__().replace('annotations_', ''), 'w') as f:
  42. f.writelines(f'./images/{s}\n' for s in unique_images)
  43. for im in tqdm(unique_images, desc=f'Converting {dir / d}'):
  44. cls = 0 # single-class dataset
  45. with open((dir / 'labels' / im).with_suffix('.txt'), 'a') as f:
  46. for r in x[images == im]:
  47. w, h = r[6], r[7] # image width, height
  48. xywh = xyxy2xywh(np.array([[r[1] / w, r[2] / h, r[3] / w, r[4] / h]]))[0] # instance
  49. f.write(f"{cls} {xywh[0]:.5f} {xywh[1]:.5f} {xywh[2]:.5f} {xywh[3]:.5f}\n") # write label