import os import argparse from pathlib import Path def sync_folders(images_dir, labels_dir, dry_run=False): images_dir = Path(images_dir) labels_dir = Path(labels_dir) if not images_dir.exists(): raise FileNotFoundError(f"Images directory not found: {images_dir}") if not labels_dir.exists(): raise FileNotFoundError(f"Labels directory not found: {labels_dir}") # 支持的图像扩展名(可根据需要修改) IMG_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'} # 获取所有图像文件(不含扩展名)和标签文件(不含扩展名) image_files = { f.stem: f for f in images_dir.iterdir() if f.is_file() and f.suffix.lower() in IMG_EXTENSIONS } label_files = { f.stem: f for f in labels_dir.iterdir() if f.is_file() and f.suffix.lower() == '.txt' } image_names = set(image_files.keys()) label_names = set(label_files.keys()) # 找出不匹配的部分 images_only = image_names - label_names # 有图无标签 labels_only = label_names - image_names # 有标签无图 print(f"总图像数: {len(image_names)}") print(f"总标签数: {len(label_names)}") print(f"仅有图像(无对应标签): {len(images_only)} 个") print(f"仅有标签(无对应图像): {len(labels_only)} 个") if not images_only and not labels_only: print("✅ 所有图像和标签已对齐!") return to_delete = [] # 默认策略:删除“仅有图像”和“仅有标签”的文件 for name in images_only: to_delete.append(image_files[name]) for name in labels_only: to_delete.append(label_files[name]) print("\n将删除以下文件:") for f in to_delete: print(f" - {f}") if dry_run: print("\n[DRY RUN] 未执行实际删除。") return confirm = input("\n⚠️ 确认删除以上文件?(y/N): ").strip().lower() if confirm == 'y': for f in to_delete: try: f.unlink() print(f"✅ 已删除: {f}") except Exception as e: print(f"❌ 删除失败: {f}, 错误: {e}") print("清理完成。") else: print("操作已取消。") if __name__ == "__main__": parser = argparse.ArgumentParser(description="同步 images 和 labels 文件夹,删除不匹配的文件") parser.add_argument("images_dir", help="图像文件夹路径") parser.add_argument("labels_dir", help="标签文件夹路径") parser.add_argument("--dry-run", action="store_true", help="仅显示将要删除的文件,不实际删除") args = parser.parse_args() sync_folders(args.images_dir, args.labels_dir, dry_run=args.dry_run)