check.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. import os
  2. import argparse
  3. from pathlib import Path
  4. def sync_folders(images_dir, labels_dir, dry_run=False):
  5. images_dir = Path(images_dir)
  6. labels_dir = Path(labels_dir)
  7. if not images_dir.exists():
  8. raise FileNotFoundError(f"Images directory not found: {images_dir}")
  9. if not labels_dir.exists():
  10. raise FileNotFoundError(f"Labels directory not found: {labels_dir}")
  11. # 支持的图像扩展名(可根据需要修改)
  12. IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
  13. # 获取所有图像文件(不含扩展名)和标签文件(不含扩展名)
  14. image_files = {
  15. f.stem: f for f in images_dir.iterdir()
  16. if f.is_file() and f.suffix.lower() in IMG_EXTENSIONS
  17. }
  18. label_files = {
  19. f.stem: f for f in labels_dir.iterdir()
  20. if f.is_file() and f.suffix.lower() == '.txt'
  21. }
  22. image_names = set(image_files.keys())
  23. label_names = set(label_files.keys())
  24. # 找出不匹配的部分
  25. images_only = image_names - label_names # 有图无标签
  26. labels_only = label_names - image_names # 有标签无图
  27. print(f"总图像数: {len(image_names)}")
  28. print(f"总标签数: {len(label_names)}")
  29. print(f"仅有图像(无对应标签): {len(images_only)} 个")
  30. print(f"仅有标签(无对应图像): {len(labels_only)} 个")
  31. if not images_only and not labels_only:
  32. print("✅ 所有图像和标签已对齐!")
  33. return
  34. to_delete = []
  35. # 默认策略:删除“仅有图像”和“仅有标签”的文件
  36. for name in images_only:
  37. to_delete.append(image_files[name])
  38. for name in labels_only:
  39. to_delete.append(label_files[name])
  40. print("\n将删除以下文件:")
  41. for f in to_delete:
  42. print(f" - {f}")
  43. if dry_run:
  44. print("\n[DRY RUN] 未执行实际删除。")
  45. return
  46. confirm = input("\n⚠️ 确认删除以上文件?(y/N): ").strip().lower()
  47. if confirm == 'y':
  48. for f in to_delete:
  49. try:
  50. f.unlink()
  51. print(f"✅ 已删除: {f}")
  52. except Exception as e:
  53. print(f"❌ 删除失败: {f}, 错误: {e}")
  54. print("清理完成。")
  55. else:
  56. print("操作已取消。")
  57. if __name__ == "__main__":
  58. parser = argparse.ArgumentParser(description="同步 images 和 labels 文件夹,删除不匹配的文件")
  59. parser.add_argument("images_dir", help="图像文件夹路径")
  60. parser.add_argument("labels_dir", help="标签文件夹路径")
  61. parser.add_argument("--dry-run", action="store_true", help="仅显示将要删除的文件,不实际删除")
  62. args = parser.parse_args()
  63. sync_folders(args.images_dir, args.labels_dir, dry_run=args.dry_run)