lstlm
/
lcnn


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
							import math
import os
import sys
from collections import OrderedDict
from datetime import datetime
from typing import Mapping
import cv2
import numpy as np
import torch
import torchvision
from PIL import Image
from matplotlib import pyplot as plt
from torch import nn
from torch.nn.modules.module import T
from torchvision.io import read_image
from torchvision.models import resnet50, ResNet50_Weights, resnet18, ResNet18_Weights
from torchvision.models._utils import _ovewrite_value_param
from torchvision.models.detection import MaskRCNN_ResNet50_FPN_V2_Weights
from torchvision.models.detection.anchor_utils import AnchorGenerator
from torchvision.models.detection.backbone_utils import _validate_trainable_layers, _resnet_fpn_extractor
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.keypoint_rcnn import KeypointRCNNPredictor, KeypointRCNN, \
    KeypointRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.ops.feature_pyramid_network import LastLevelMaxPool
from torchvision.utils import draw_bounding_boxes
from torchvision.ops import misc as misc_nn_ops, FeaturePyramidNetwork
from typing import Optional, Any
from models.config.config_tool import read_yaml
from models.keypoint.trainer import train_cfg
from models.wirenet._utils import overwrite_eps
# from timm import create_model
from  torchvision.models._meta import _COCO_PERSON_CATEGORIES, _COCO_PERSON_KEYPOINT_NAMES
from tools import utils
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

class KeypointRCNNModel(nn.Module):

    def __init__(self, num_classes=2,num_keypoints=2, transforms=None):
        super(KeypointRCNNModel, self).__init__()

        ####mobile net
       # backbone = torchvision.models.mobilenet_v2(weights=None).features
       # backbone.out_channels = 1280
       # anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),aspect_ratios = ((0.5, 1.0, 2.0),))
       # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],output_size = 7,sampling_ratio = 2)
       #keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],output_size = 14,sampling_ratio = 2)
       # self.__model= KeypointRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,keypoint_roi_pool=keypoint_roi_pooler)
        ####

        # 加载 EfficientNet 模型并移除分类头
        # backbone = create_model('tf_efficientnet_b0', pretrained=True, features_only=True)
        # backbone_out_channels =backbone.feature_info.channels()  # 获取所有阶段的通道数
        #
        #
        # # 构建 FPN
        # fpn = FeaturePyramidNetwork(
        #     in_channels_list=backbone_out_channels,
        #     out_channels=256,
        #     extra_blocks=LastLevelMaxPool()
        # )
        #
        # # 将 EfficientNet 和 FPN 组合成一个新的 backbone
        # self.body = nn.Sequential(
        #     backbone,
        #     fpn
        # )
        default_weights = torchvision.models.detection.KeypointRCNN_ResNet50_FPN_Weights.DEFAULT

        self.__model = keypointrcnn_resnet18_fpn(weights=None,num_classes=num_classes,
                                                                              num_keypoints=num_keypoints,
                                                                              progress=False)
        # self.__model.backbone.body = nn.Sequential(OrderedDict([
        #     ('body', self.body),
        #     ('fpn', fpn)
        # ]))

        if transforms is None:
            self.transforms = torchvision.models.detection.KeypointRCNN_ResNet50_FPN_Weights.DEFAULT.transforms()
        # if num_classes != 0:
        #     self.set_num_classes(num_classes)
            # self.__num_classes=0

        self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    def forward(self, inputs):
        outputs = self.__model(inputs)
        return outputs

    def train(self, cfg):
        parameters = read_yaml(cfg)
        num_classes = parameters['num_classes']
        num_keypoints = parameters['num_keypoints']
        # print(f'num_classes:{num_classes}')
        # self.set_num_classes(num_classes)
        self.num_keypoints = num_keypoints
        train_cfg(self.__model, cfg)

    # def set_num_classes(self, num_classes):
    #     in_features = self.__model.roi_heads.box_predictor.cls_score.in_features
    #     self.__model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes=num_classes)
    #
    #     # in_features_mask = self.__model.roi_heads.mask_predictor.conv5_mask.in_channels
    #     in_channels = self.__model.roi_heads.keypoint_predictor.
    #     hidden_layer = 256
    #     self.__model.roi_heads.mask_predictor = KeypointRCNNPredictor(in_channels, hidden_layer,
    #                                                               num_classes=num_classes)
    #     self.__model.roi_heads.keypoint_predictor=KeypointRCNNPredictor(in_channels, num_keypoints=num_classes)

    def load_weight(self, pt_path):
        state_dict = torch.load(pt_path)
        self.__model.load_state_dict(state_dict)

    def load_state_dict(self, state_dict: Mapping[str, Any], strict: bool = True):
        self.__model.load_state_dict(state_dict)
        # return super().load_state_dict(state_dict, strict)

    def eval(self: T) -> T:
        self.__model.eval()
        # return super().eval()
    def predict(self, img, show=True, save=False, save_path=None):
        """
         对输入图像进行关键点检测预测。

         参数:
             img (str or PIL.Image): 输入图像的路径或 PIL.Image 对象。
             show (bool): 是否显示预测结果，默认为 True。
             save (bool): 是否保存预测结果，默认为 False。

         返回:
             dict: 包含预测结果的字典。
         """
        if isinstance(img, str):
            img = Image.open(img).convert("RGB")

        self.__model.eval()

        # 预处理图像
        img_tensor = self.transforms(img)
        with torch.no_grad():
            predictions = self.__model([img_tensor])

        print(f'predictions:{predictions}')

        # 后处理预测结果
        boxes = predictions[0]['boxes'].cpu().numpy()
        keypoints = predictions[0]['keypoints'].cpu().numpy()

        # 可视化预测结果
        if show or save:
            fig, ax = plt.subplots(figsize=(10, 10))
            ax.imshow(np.array(img))

            for box in boxes:
                x0, y0, x1, y1 = box
                ax.add_patch(plt.Rectangle((x0, y0), x1 - x0, y1 - y0, fill=False, edgecolor='yellow', linewidth=1))

            for (a, b) in keypoints:
                ax.plot([a[0], b[0]], [a[1], b[1]], c='red', linewidth=1)
                ax.scatter(a[0], a[1], c='red', s=2)
                ax.scatter(b[0], b[1], c='red', s=2)

            if show:
                plt.show()

            if save:
                fig.savefig(save_path)
                print(f"Prediction saved to {save_path}")
            plt.close(fig)

def keypointrcnn_resnet18_fpn(
        *,
        weights: Optional[KeypointRCNN_ResNet50_FPN_Weights] = None,
        progress: bool = True,
        num_classes: Optional[int] = None,
        num_keypoints: Optional[int] = None,
        weights_backbone: Optional[ResNet50_Weights] = ResNet50_Weights.IMAGENET1K_V1,
        trainable_backbone_layers: Optional[int] = None,
        **kwargs: Any,
) -> KeypointRCNN:
    """
    Constructs a Keypoint R-CNN model with a ResNet-50-FPN backbone.

    .. betastatus:: detection module

    Reference: `Mask R-CNN <https://arxiv.org/abs/1703.06870>`__.

    The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
    image, and should be in ``0-1`` range. Different images can have different sizes.

    The behavior of the model changes depending on if it is in training or evaluation mode.

    During training, the model expects both the input tensors and targets (list of dictionary),
    containing:

        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (``Int64Tensor[N]``): the class label for each ground-truth box
        - keypoints (``FloatTensor[N, K, 3]``): the ``K`` keypoints location for each of the ``N`` instances, in the
          format ``[x, y, visibility]``, where ``visibility=0`` means that the keypoint is not visible.

    The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
    losses for both the RPN and the R-CNN, and the keypoint loss.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
    follows, where ``N`` is the number of detected instances:

        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (``Int64Tensor[N]``): the predicted labels for each instance
        - scores (``Tensor[N]``): the scores or each instance
        - keypoints (``FloatTensor[N, K, 3]``): the locations of the predicted keypoints, in ``[x, y, v]`` format.

    For more details on the output, you may refer to :ref:`instance_seg_output`.

    Keypoint R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size.

    Example::

        >>> model = torchvision.models.detection.keypointrcnn_resnet50_fpn(weights=KeypointRCNN_ResNet50_FPN_Weights.DEFAULT)
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)
        >>>
        >>> # optionally, if you want to export the model to ONNX:
        >>> torch.onnx.export(model, x, "keypoint_rcnn.onnx", opset_version = 11)

    Args:
        weights (:class:`~torchvision.models.detection.KeypointRCNN_ResNet50_FPN_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.detection.KeypointRCNN_ResNet50_FPN_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr
        num_classes (int, optional): number of output classes of the model (including the background)
        num_keypoints (int, optional): number of keypoints
        weights_backbone (:class:`~torchvision.models.ResNet50_Weights`, optional): The
            pretrained weights for the backbone.
        trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
            Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
            passed (the default) this value is set to 3.

    .. autoclass:: torchvision.models.detection.KeypointRCNN_ResNet50_FPN_Weights
        :members:
    """
    weights = KeypointRCNN_ResNet50_FPN_Weights.verify(weights)
    weights_backbone = ResNet50_Weights.verify(weights_backbone)
    # if weights_backbone is None:

    weights_backbone = ResNet18_Weights.IMAGENET1K_V1

    if weights is not None:
        # weights_backbone = None
        num_classes = _ovewrite_value_param("num_classes", num_classes, len(weights.meta["categories"]))
        num_keypoints = _ovewrite_value_param("num_keypoints", num_keypoints, len(weights.meta["keypoint_names"]))
    else:
        if num_classes is None:
            num_classes = 2
        if num_keypoints is None:
            num_keypoints = 17

    is_trained = weights is not None or weights_backbone is not None
    trainable_backbone_layers = _validate_trainable_layers(is_trained, trainable_backbone_layers, 5, 3)
    norm_layer = misc_nn_ops.FrozenBatchNorm2d if is_trained else nn.BatchNorm2d

    backbone = resnet18(weights=weights_backbone, progress=progress, norm_layer=norm_layer)

    backbone = _resnet_fpn_extractor(backbone, trainable_backbone_layers)
    model = KeypointRCNN(backbone, num_classes, num_keypoints=num_keypoints, **kwargs)

    if weights is not None:
        model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
        if weights == KeypointRCNN_ResNet50_FPN_Weights.COCO_V1:
            overwrite_eps(model, 0.0)

    return model
if __name__ == '__main__':
    # ins_model = MaskRCNNModel(num_classes=5)
    keypoint_model = KeypointRCNNModel(num_keypoints=2)
    wts_path='./train_results/20241227_231659/weights/best.pt'


    # data_path = r'F:\DevTools\datasets\renyaun\1012\spilt'
    # ins_model.train(data_dir=data_path,epochs=5000,target_type='pixel',batch_size=6,num_workers=10,num_classes=5)
    keypoint_model.train(cfg='train.yaml')

    # keypoint_model.load_weight(wts_path)
    # img_path=r"F:\DevTools\datasets\renyaun\1012\images\2024-09-23-10-02-15_SaveImage.png"
    # keypoint_model.predict(img_path)