#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
JSON标签到YOLO格式转换脚本
支持多种常见的JSON标注格式转换为YOLO格式

功能特性:
- 支持LabelMe、COCO、YOLO等多种JSON格式
- 矩形标注转换为YOLO边界框格式 (class_id x_center y_center width height)
- 多边形标注保留所有点位信息 (class_id x1 y1 x2 y2 ... xn yn)
- 自动归一化坐标到[0,1]范围
- 支持自定义类别映射文件
"""

import os
import json
import glob
from pathlib import Path
import argparse

def convert_bbox_to_yolo(bbox, img_width, img_height, format_type="xywh"):
    """
    将边界框坐标转换为YOLO格式（归一化的中心点坐标和宽高）
    
    Args:
        bbox: 边界框坐标
        img_width: 图片宽度
        img_height: 图片高度
        format_type: 输入格式类型 ("xywh", "xyxy", "coco")
    
    Returns:
        tuple: (center_x, center_y, width, height) 归一化坐标
    """
    
    if format_type == "xyxy":
        # 格式: [x_min, y_min, x_max, y_max]
        x_min, y_min, x_max, y_max = bbox
        width = x_max - x_min
        height = y_max - y_min
        center_x = x_min + width / 2
        center_y = y_min + height / 2
        
    elif format_type == "xywh":
        # 格式: [x, y, width, height] (左上角坐标)
        x, y, width, height = bbox
        center_x = x + width / 2
        center_y = y + height / 2
        
    elif format_type == "coco":
        # COCO格式: [x, y, width, height] (左上角坐标)
        x, y, width, height = bbox
        center_x = x + width / 2
        center_y = y + height / 2
        
    else:
        raise ValueError(f"不支持的格式类型: {format_type}")
    
    # 归一化坐标
    center_x_norm = center_x / img_width
    center_y_norm = center_y / img_height
    width_norm = width / img_width
    height_norm = height / img_height
    
    return center_x_norm, center_y_norm, width_norm, height_norm

def convert_polygon_to_yolo(points, img_width, img_height):
    """
    将多边形点位转换为YOLO格式（归一化坐标）
    
    Args:
        points: 多边形点位列表 [[x1, y1], [x2, y2], ...]
        img_width: 图片宽度
        img_height: 图片高度
    
    Returns:
        list: 归一化的点位坐标 [x1_norm, y1_norm, x2_norm, y2_norm, ...]
    """
    normalized_points = []
    
    for point in points:
        x, y = point
        # 归一化坐标
        x_norm = x / img_width
        y_norm = y / img_height
        normalized_points.extend([x_norm, y_norm])
    
    return normalized_points

def parse_labelme_json(json_data):
    """
    解析LabelMe格式的JSON文件
    
    Args:
        json_data: JSON数据
        
    Returns:
        list: 包含(class_name, bbox)的列表
    """
    annotations = []
    img_width = json_data.get('imageWidth', 0)
    img_height = json_data.get('imageHeight', 0)
    
    if img_width == 0 or img_height == 0:
        raise ValueError("JSON文件中缺少图片尺寸信息")
    
    for shape in json_data.get('shapes', []):
        label = shape.get('label', '')
        shape_type = shape.get('shape_type', 'rectangle')
        points = shape.get('points', [])
        
        if shape_type == 'rectangle' and len(points) == 2:
            # 矩形格式: [[x1, y1], [x2, y2]]
            x1, y1 = points[0]
            x2, y2 = points[1]
            
            # 确保坐标顺序正确
            x_min = min(x1, x2)
            y_min = min(y1, y2)
            x_max = max(x1, x2)
            y_max = max(y1, y2)
            
            bbox = [x_min, y_min, x_max, y_max]
            annotations.append((label, bbox, "xyxy", img_width, img_height))
            
        elif shape_type == 'polygon' and len(points) >= 3:
            # 多边形格式: 保留所有点位信息
            annotations.append((label, points, "polygon", img_width, img_height))
    
    return annotations

def parse_coco_json(json_data):
    """
    解析COCO格式的JSON文件
    
    Args:
        json_data: JSON数据
        
    Returns:
        dict: 按图片ID分组的标注信息
    """
    # 构建类别映射
    categories = {cat['id']: cat['name'] for cat in json_data.get('categories', [])}
    
    # 构建图片信息映射
    images = {img['id']: img for img in json_data.get('images', [])}
    
    # 按图片分组标注
    annotations_by_image = {}
    
    for ann in json_data.get('annotations', []):
        image_id = ann['image_id']
        category_id = ann['category_id']
        bbox = ann['bbox']  # COCO格式: [x, y, width, height]
        
        if image_id not in annotations_by_image:
            annotations_by_image[image_id] = []
        
        if image_id in images:
            img_info = images[image_id]
            img_width = img_info['width']
            img_height = img_info['height']
            class_name = categories.get(category_id, f'class_{category_id}')
            
            annotations_by_image[image_id].append((
                class_name, bbox, "coco", img_width, img_height, img_info['file_name']
            ))
    
    return annotations_by_image

def parse_yolo_json(json_data):
    """
    解析自定义YOLO JSON格式
    
    Args:
        json_data: JSON数据
        
    Returns:
        list: 包含(class_name, bbox)的列表
    """
    annotations = []
    img_width = json_data.get('image_width', json_data.get('width', 0))
    img_height = json_data.get('image_height', json_data.get('height', 0))
    
    if img_width == 0 or img_height == 0:
        raise ValueError("JSON文件中缺少图片尺寸信息")
    
    for obj in json_data.get('objects', json_data.get('annotations', [])):
        class_name = obj.get('class', obj.get('category', obj.get('label', '')))
        
        # 支持多种边界框格式
        if 'bbox' in obj:
            bbox = obj['bbox']
            bbox_format = obj.get('bbox_format', 'xywh')
        elif 'bounding_box' in obj:
            bbox = obj['bounding_box']
            bbox_format = obj.get('bbox_format', 'xywh')
        elif all(k in obj for k in ['x', 'y', 'width', 'height']):
            bbox = [obj['x'], obj['y'], obj['width'], obj['height']]
            bbox_format = 'xywh'
        elif all(k in obj for k in ['x_min', 'y_min', 'x_max', 'y_max']):
            bbox = [obj['x_min'], obj['y_min'], obj['x_max'], obj['y_max']]
            bbox_format = 'xyxy'
        else:
            print(f"警告: 无法解析对象的边界框格式: {obj}")
            continue
        
        annotations.append((class_name, bbox, bbox_format, img_width, img_height))
    
    return annotations

def convert_json_to_yolo(json_file_path, output_dir, class_mapping=None, json_format="auto"):
    """
    将JSON标注文件转换为YOLO格式
    
    Args:
        json_file_path: JSON文件路径
        output_dir: 输出目录
        class_mapping: 类别名称到ID的映射字典
        json_format: JSON格式类型 ("auto", "labelme", "coco", "yolo")
    """
    
    with open(json_file_path, 'r', encoding='utf-8') as f:
        json_data = json.load(f)
    
    # 自动检测JSON格式
    if json_format == "auto":
        if 'shapes' in json_data and 'imageWidth' in json_data:
            json_format = "labelme"
        elif 'categories' in json_data and 'annotations' in json_data and 'images' in json_data:
            json_format = "coco"
        else:
            json_format = "yolo"
    
    print(f"检测到JSON格式: {json_format}")
    
    # 解析JSON数据
    if json_format == "labelme":
        annotations = parse_labelme_json(json_data)
        # 为LabelMe格式生成单个txt文件
        base_name = Path(json_file_path).stem
        output_file = os.path.join(output_dir, f"{base_name}.txt")
        
        with open(output_file, 'w', encoding='utf-8') as f:
            for class_name, data, data_format, img_width, img_height in annotations:
                # 获取类别ID
                if class_mapping and class_name in class_mapping:
                    class_id = class_mapping[class_name]
                else:
                    class_id = 0  # 默认类别ID
                
                if data_format == "polygon":
                    # 处理多边形点位
                    normalized_points = convert_polygon_to_yolo(data, img_width, img_height)
                    # 写入YOLO格式的多边形标注
                    points_str = ' '.join([f"{coord:.6f}" for coord in normalized_points])
                    f.write(f"{class_id} {points_str}\n")
                else:
                    # 处理边界框
                    center_x, center_y, width, height = convert_bbox_to_yolo(
                        data, img_width, img_height, data_format
                    )
                    # 写入YOLO格式的边界框标注
                    f.write(f"{class_id} {center_x:.6f} {center_y:.6f} {width:.6f} {height:.6f}\n")
        
        print(f"已生成: {output_file}")
        
    elif json_format == "coco":
        annotations_by_image = parse_coco_json(json_data)
        
        for image_id, annotations in annotations_by_image.items():
            if not annotations:
                continue
                
            # 使用第一个标注的文件名信息
            file_name = annotations[0][5]  # file_name
            base_name = Path(file_name).stem
            output_file = os.path.join(output_dir, f"{base_name}.txt")
            
            with open(output_file, 'w', encoding='utf-8') as f:
                for class_name, bbox, bbox_format, img_width, img_height, _ in annotations:
                    # 获取类别ID
                    if class_mapping and class_name in class_mapping:
                        class_id = class_mapping[class_name]
                    else:
                        class_id = 0  # 默认类别ID
                    
                    # 转换为YOLO格式
                    center_x, center_y, width, height = convert_bbox_to_yolo(
                        bbox, img_width, img_height, bbox_format
                    )
                    
                    # 写入YOLO格式
                    f.write(f"{class_id} {center_x:.6f} {center_y:.6f} {width:.6f} {height:.6f}\n")
            
            print(f"已生成: {output_file}")
            
    elif json_format == "yolo":
        annotations = parse_yolo_json(json_data)
        base_name = Path(json_file_path).stem
        output_file = os.path.join(output_dir, f"{base_name}.txt")
        
        with open(output_file, 'w', encoding='utf-8') as f:
            for class_name, bbox, bbox_format, img_width, img_height in annotations:
                # 获取类别ID
                if class_mapping and class_name in class_mapping:
                    class_id = class_mapping[class_name]
                else:
                    class_id = 0  # 默认类别ID
                
                # 转换为YOLO格式
                center_x, center_y, width, height = convert_bbox_to_yolo(
                    bbox, img_width, img_height, bbox_format
                )
                
                # 写入YOLO格式
                f.write(f"{class_id} {center_x:.6f} {center_y:.6f} {width:.6f} {height:.6f}\n")
        
        print(f"已生成: {output_file}")

def load_class_mapping(mapping_file):
    """
    从文件加载类别映射
    
    Args:
        mapping_file: 映射文件路径 (支持txt和json格式)
        
    Returns:
        dict: 类别名称到ID的映射
    """
    if not os.path.exists(mapping_file):
        return None
    
    mapping = {}
    
    if mapping_file.endswith('.json'):
        with open(mapping_file, 'r', encoding='utf-8') as f:
            mapping = json.load(f)
    else:
        # txt格式兼容：
        # 1) "类名"（行号作为ID）
        # 2) "ID 类名" 或 "ID,类名"（显式ID与类名）
        # 3) "类名 ID"（显式ID在末尾）
        # 会自动忽略行首/行尾的空白与注释（# 开始的内容）
        with open(mapping_file, 'r', encoding='utf-8') as f:
            for i, raw in enumerate(f):
                line = raw.strip()
                if not line:
                    continue
                # 去除行内注释
                if '#' in line:
                    line = line.split('#', 1)[0].strip()
                if not line:
                    continue

                cls_name = None
                cls_id = None

                # 尝试按逗号分隔（例如："0,fire"）
                if ',' in line:
                    parts = [p.strip() for p in line.split(',') if p.strip()]
                    if len(parts) == 2 and parts[0].isdigit():
                        cls_id = int(parts[0])
                        cls_name = parts[1]

                # 若未解析到，尝试按空白分隔（例如："0 fire" 或 "fire 0" 或 "fire"）
                if cls_name is None:
                    tokens = [t for t in line.split() if t]
                    if len(tokens) == 1:
                        # 仅类名：按行号作为ID
                        cls_name = tokens[0]
                        cls_id = i
                    elif len(tokens) >= 2:
                        # 两段或以上：尝试识别前后是否为ID
                        if tokens[0].isdigit():
                            # "ID 类名(可能包含空格)"
                            cls_id = int(tokens[0])
                            cls_name = ' '.join(tokens[1:])
                        elif tokens[-1].isdigit():
                            # "类名(可能包含空格) ID"
                            cls_id = int(tokens[-1])
                            cls_name = ' '.join(tokens[:-1])
                        else:
                            # 都不是数字，则将整行视为类名，按行号作为ID
                            cls_name = ' '.join(tokens)
                            cls_id = i

                if cls_name:
                    mapping[cls_name] = cls_id
    
    return mapping

def main():
    parser = argparse.ArgumentParser(description='JSON标签到YOLO格式转换工具')
    parser.add_argument('input_path', help='输入JSON文件或包含JSON文件的目录')
    parser.add_argument('-o', '--output', default='./20251124/yolo_labels', help='输出目录 (默认: ./yolo_labels)')
    parser.add_argument('-c', '--classes', help='类别映射文件 (txt或json格式)')
    parser.add_argument('-f', '--format', choices=['auto', 'labelme', 'coco', 'yolo'], 
                       default='auto', help='JSON格式类型 (默认: auto)')
    parser.add_argument('--test', action='store_true', help='测试模式，仅显示解析结果不生成文件')
    
    args = parser.parse_args()
    
    # 创建输出目录
    output_dir = args.output
    if not args.test:
        os.makedirs(output_dir, exist_ok=True)
    
    # 加载类别映射
    class_mapping = None
    if args.classes:
        class_mapping = load_class_mapping(args.classes)
        if class_mapping:
            print(f"已加载类别映射: {class_mapping}")
        else:
            print(f"警告: 无法加载类别映射文件: {args.classes}")
    
    # 处理输入路径
    input_path = args.input_path
    
    if os.path.isfile(input_path):
        # 单个文件
        json_files = [input_path]
    elif os.path.isdir(input_path):
        # 目录中的所有JSON文件
        json_files = glob.glob(os.path.join(input_path, "*.json"))
    else:
        print(f"错误: 输入路径不存在: {input_path}")
        return
    
    if not json_files:
        print(f"错误: 在 {input_path} 中没有找到JSON文件")
        return
    
    print(f"找到 {len(json_files)} 个JSON文件")
    
    # 转换文件
    success_count = 0
    error_count = 0
    
    for json_file in json_files:
        try:
            print(f"\n处理文件: {json_file}")
            
            if args.test:
                # 测试模式：仅解析和显示信息
                with open(json_file, 'r', encoding='utf-8') as f:
                    json_data = json.load(f)
                
                print(f"  JSON键: {list(json_data.keys())}")
                
                if 'shapes' in json_data:
                    print(f"  LabelMe格式，包含 {len(json_data['shapes'])} 个标注")
                elif 'annotations' in json_data:
                    print(f"  COCO格式，包含 {len(json_data['annotations'])} 个标注")
                else:
                    print(f"  自定义格式")
            else:
                convert_json_to_yolo(json_file, output_dir, class_mapping, args.format)
            
            success_count += 1
            
        except Exception as e:
            print(f"  错误: {e}")
            error_count += 1
    
    print(f"\n转换完成:")
    print(f"  成功: {success_count} 个文件")
    print(f"  失败: {error_count} 个文件")
    
    if not args.test and success_count > 0:
        print(f"  输出目录: {output_dir}")

if __name__ == "__main__":
    # 如果没有命令行参数，使用交互模式
    import sys
    if len(sys.argv) == 1:
        print("JSON标签到YOLO格式转换工具")
        print("=" * 50)
        
        # 交互式输入
        input_path = input("请输入JSON文件或目录路径: ").strip()
        if not input_path:
            print("错误: 必须提供输入路径")
            sys.exit(1)
        
        output_dir = input("请输入输出目录 (默认: ./yolo_labels): ").strip()
        if not output_dir:
            output_dir = "./yolo_labels"
        
        classes_file = input("请输入类别映射文件路径 (可选): ").strip()
        
        json_format = input("请输入JSON格式 (auto/labelme/coco/yolo, 默认: auto): ").strip()
        if not json_format:
            json_format = "auto"
        
        test_mode = input("是否启用测试模式？(y/N): ").strip().lower() == 'y'
        
        # 模拟命令行参数
        sys.argv = ['json_to_yolo.py', input_path, '-o', output_dir, '-f', json_format]
        if classes_file:
            sys.argv.extend(['-c', classes_file])
        if test_mode:
            sys.argv.append('--test')
    
    main()