| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- import os
- import glob
- import uuid
- def batch_rename(dataset_root):
- # 定义需要处理的子集
- sub_sets = ['train', 'val', 'test']
-
- # 支持的图片扩展名 (根据你的实际情况添加)
- valid_extensions = ['.jpg', '.jpeg', '.png', '.bmp']
- for subset in sub_sets:
- # 构建路径
- img_dir = os.path.join(dataset_root, 'images', subset)
- label_dir = os.path.join(dataset_root, 'labels', subset)
- # 检查文件夹是否存在
- if not os.path.exists(img_dir) or not os.path.exists(label_dir):
- print(f"Skipping {subset}: directories not found.")
- continue
- print(f"Processing: {subset} ...")
- # 1. 获取所有图片文件
- files = os.listdir(img_dir)
- # 筛选出图片并排序(排序很重要,保证重命名的确定性)
- image_files = sorted([f for f in files if os.path.splitext(f)[1].lower() in valid_extensions])
- if not image_files:
- print(f" No images found in {subset}")
- continue
- # 2. 检查对应关系并准备重命名列表
- # 格式: (原图片路径, 原标签路径, 目标图片名, 目标标签名)
- rename_pairs = []
-
- count = 0
- for img_name in image_files:
- file_body, ext = os.path.splitext(img_name)
-
- # 假设标签是 .txt 格式
- txt_name = file_body + ".txt"
- src_img_path = os.path.join(img_dir, img_name)
- src_txt_path = os.path.join(label_dir, txt_name)
- # 检查标签文件是否存在
- if os.path.exists(src_txt_path):
- count += 1
- # 生成新名字,例如 00001
- new_name_body = f"{count:05d}"
- dst_img_name = new_name_body + ext
- dst_txt_name = new_name_body + ".txt"
-
- rename_pairs.append({
- 'src_img': src_img_path,
- 'src_txt': src_txt_path,
- 'dst_img': os.path.join(img_dir, dst_img_name),
- 'dst_txt': os.path.join(label_dir, dst_txt_name)
- })
- else:
- print(f" Warning: No label found for {img_name}, skipping.")
- print(f" Found {len(rename_pairs)} pairs. Starting rename...")
- # 3. 执行重命名 - 第一阶段:重命名为临时 UUID
- # 这一步是为了防止 目标文件名 已经存在于文件夹中导致覆盖 (例如把 2.jpg 改为 1.jpg,但 1.jpg 还没处理)
- for item in rename_pairs:
- # 生成随机临时名
- temp_token = str(uuid.uuid4())
- item['temp_img'] = item['src_img'] + f".{temp_token}.tmp"
- item['temp_txt'] = item['src_txt'] + f".{temp_token}.tmp"
-
- os.rename(item['src_img'], item['temp_img'])
- os.rename(item['src_txt'], item['temp_txt'])
- # 4. 执行重命名 - 第二阶段:从临时名改为目标名 (00001.jpg)
- for item in rename_pairs:
- os.rename(item['temp_img'], item['dst_img'])
- os.rename(item['temp_txt'], item['dst_txt'])
-
- print(f" {subset} done! Processed {len(rename_pairs)} pairs.")
- if __name__ == "__main__":
- # 这里修改为你的数据集根目录路径
- # 结构应为:
- # my_dataset/
- # ├── images/
- # │ ├── train/
- # │ ├── val/
- # │ └── test/
- # └── labels/
- # ├── train/
- # ├── val/
- # └── test/
-
- dataset_path = r"20251210" # 请修改这里!注意路径不要包含中文以免报错
-
- # 二次确认
- confirm = input(f"Target path is: {dataset_path}\nHave you backed up your data? (y/n): ")
- if confirm.lower() == 'y':
- batch_rename(dataset_path)
- print("\nAll operations completed.")
- else:
- print("Operation cancelled.")
|