| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- import os
- import argparse
- from pathlib import Path
- def sync_folders(images_dir, labels_dir, dry_run=False):
- images_dir = Path(images_dir)
- labels_dir = Path(labels_dir)
- if not images_dir.exists():
- raise FileNotFoundError(f"Images directory not found: {images_dir}")
- if not labels_dir.exists():
- raise FileNotFoundError(f"Labels directory not found: {labels_dir}")
- # 支持的图像扩展名(可根据需要修改)
- IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
- # 获取所有图像文件(不含扩展名)和标签文件(不含扩展名)
- image_files = {
- f.stem: f for f in images_dir.iterdir()
- if f.is_file() and f.suffix.lower() in IMG_EXTENSIONS
- }
- label_files = {
- f.stem: f for f in labels_dir.iterdir()
- if f.is_file() and f.suffix.lower() == '.txt'
- }
- image_names = set(image_files.keys())
- label_names = set(label_files.keys())
- # 找出不匹配的部分
- images_only = image_names - label_names # 有图无标签
- labels_only = label_names - image_names # 有标签无图
- print(f"总图像数: {len(image_names)}")
- print(f"总标签数: {len(label_names)}")
- print(f"仅有图像(无对应标签): {len(images_only)} 个")
- print(f"仅有标签(无对应图像): {len(labels_only)} 个")
- if not images_only and not labels_only:
- print("✅ 所有图像和标签已对齐!")
- return
- to_delete = []
- # 默认策略:删除“仅有图像”和“仅有标签”的文件
- for name in images_only:
- to_delete.append(image_files[name])
- for name in labels_only:
- to_delete.append(label_files[name])
- print("\n将删除以下文件:")
- for f in to_delete:
- print(f" - {f}")
- if dry_run:
- print("\n[DRY RUN] 未执行实际删除。")
- return
- confirm = input("\n⚠️ 确认删除以上文件?(y/N): ").strip().lower()
- if confirm == 'y':
- for f in to_delete:
- try:
- f.unlink()
- print(f"✅ 已删除: {f}")
- except Exception as e:
- print(f"❌ 删除失败: {f}, 错误: {e}")
- print("清理完成。")
- else:
- print("操作已取消。")
- if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="同步 images 和 labels 文件夹,删除不匹配的文件")
- parser.add_argument("images_dir", help="图像文件夹路径")
- parser.add_argument("labels_dir", help="标签文件夹路径")
- parser.add_argument("--dry-run", action="store_true", help="仅显示将要删除的文件,不实际删除")
- args = parser.parse_args()
- sync_folders(args.images_dir, args.labels_dir, dry_run=args.dry_run)
|