před 1 měsícem · f214582572
--- a/lcnn/models/detection/ROI_heads.py
+++ b/lcnn/models/detection/ROI_heads.py
@@ -53,18 +53,18 @@ def maskrcnn_inference(x, labels):
 
				     # type: (Tensor, List[Tensor]) -> List[Tensor]
			
 
				     """
			
 
				     From the results of the CNN, post process the masks
			
 
				-    by taking the mask corresponding to the class with max
			
 
				+    by taking the ins corresponding to the class with max
			
 
				     probability (which are of fixed size and directly output
			
 
				-    by the CNN) and return the masks in the mask field of the BoxList.
			
 
				+    by the CNN) and return the masks in the ins field of the BoxList.
			
 
				 
			
 
				     Args:
			
 
				-        x (Tensor): the mask logits
			
 
				+        x (Tensor): the ins logits
			
 
				         labels (list[BoxList]): bounding boxes that are used as
			
 
				             reference, one for ech image
			
 
				 
			
 
				     Returns:
			
 
				         results (list[BoxList]): one BoxList for each image, containing
			
 
				-            the extra field mask
			
 
				+            the extra field ins
			
 
				     """
			
 
				     mask_prob = x.sigmoid()
			
 
				 
			
@@ -411,7 +411,7 @@ def paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, -1, -1))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
@@ -437,7 +437,7 @@ def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
--- a/libs/vision_libs/datasets/_optical_flow.py
+++ b/libs/vision_libs/datasets/_optical_flow.py
@@ -70,7 +70,7 @@ class FlowDataset(ABC, VisionDataset):
 
				             img1, img2, flow, valid_flow_mask = self.transforms(img1, img2, flow, valid_flow_mask)
			
 
				 
			
 
				         if self._has_builtin_flow_mask or valid_flow_mask is not None:
			
 
				-            # The `or valid_flow_mask is not None` part is here because the mask can be generated within a transform
			
 
				+            # The `or valid_flow_mask is not None` part is here because the ins can be generated within a transform
			
 
				             return img1, img2, flow, valid_flow_mask
			
 
				         else:
			
 
				             return img1, img2, flow
			
@@ -120,7 +120,7 @@ class Sintel(FlowDataset):
 
				         transforms (callable, optional): A function/transform that takes in
			
 
				             ``img1, img2, flow, valid_flow_mask`` and returns a transformed version.
			
 
				             ``valid_flow_mask`` is expected for consistency with other datasets which
			
 
				-            return a built-in valid mask, such as :class:`~torchvision.datasets.KittiFlow`.
			
 
				+            return a built-in valid ins, such as :class:`~torchvision.datasets.KittiFlow`.
			
 
				     """
			
 
				 
			
 
				     def __init__(
			
@@ -160,7 +160,7 @@ class Sintel(FlowDataset):
 
				             tuple: A 3-tuple with ``(img1, img2, flow)``.
			
 
				             The flow is a numpy array of shape (2, H, W) and the images are PIL images.
			
 
				             ``flow`` is None if ``split="test"``.
			
 
				-            If a valid flow mask is generated within the ``transforms`` parameter,
			
 
				+            If a valid flow ins is generated within the ``transforms`` parameter,
			
 
				             a 4-tuple with ``(img1, img2, flow, valid_flow_mask)`` is returned.
			
 
				         """
			
 
				         return super().__getitem__(index)
			
@@ -219,7 +219,7 @@ class KittiFlow(FlowDataset):
 
				 
			
 
				         Returns:
			
 
				             tuple: A 4-tuple with ``(img1, img2, flow, valid_flow_mask)``
			
 
				-            where ``valid_flow_mask`` is a numpy boolean mask of shape (H, W)
			
 
				+            where ``valid_flow_mask`` is a numpy boolean ins of shape (H, W)
			
 
				             indicating which flow values are valid. The flow is a numpy array of
			
 
				             shape (2, H, W) and the images are PIL images. ``flow`` and ``valid_flow_mask`` are None if
			
 
				             ``split="test"``.
			
@@ -253,7 +253,7 @@ class FlyingChairs(FlowDataset):
 
				         transforms (callable, optional): A function/transform that takes in
			
 
				             ``img1, img2, flow, valid_flow_mask`` and returns a transformed version.
			
 
				             ``valid_flow_mask`` is expected for consistency with other datasets which
			
 
				-            return a built-in valid mask, such as :class:`~torchvision.datasets.KittiFlow`.
			
 
				+            return a built-in valid ins, such as :class:`~torchvision.datasets.KittiFlow`.
			
 
				     """
			
 
				 
			
 
				     def __init__(self, root: str, split: str = "train", transforms: Optional[Callable] = None) -> None:
			
@@ -289,7 +289,7 @@ class FlyingChairs(FlowDataset):
 
				             tuple: A 3-tuple with ``(img1, img2, flow)``.
			
 
				             The flow is a numpy array of shape (2, H, W) and the images are PIL images.
			
 
				             ``flow`` is None if ``split="val"``.
			
 
				-            If a valid flow mask is generated within the ``transforms`` parameter,
			
 
				+            If a valid flow ins is generated within the ``transforms`` parameter,
			
 
				             a 4-tuple with ``(img1, img2, flow, valid_flow_mask)`` is returned.
			
 
				         """
			
 
				         return super().__getitem__(index)
			
@@ -324,7 +324,7 @@ class FlyingThings3D(FlowDataset):
 
				         transforms (callable, optional): A function/transform that takes in
			
 
				             ``img1, img2, flow, valid_flow_mask`` and returns a transformed version.
			
 
				             ``valid_flow_mask`` is expected for consistency with other datasets which
			
 
				-            return a built-in valid mask, such as :class:`~torchvision.datasets.KittiFlow`.
			
 
				+            return a built-in valid ins, such as :class:`~torchvision.datasets.KittiFlow`.
			
 
				     """
			
 
				 
			
 
				     def __init__(
			
@@ -387,7 +387,7 @@ class FlyingThings3D(FlowDataset):
 
				             tuple: A 3-tuple with ``(img1, img2, flow)``.
			
 
				             The flow is a numpy array of shape (2, H, W) and the images are PIL images.
			
 
				             ``flow`` is None if ``split="test"``.
			
 
				-            If a valid flow mask is generated within the ``transforms`` parameter,
			
 
				+            If a valid flow ins is generated within the ``transforms`` parameter,
			
 
				             a 4-tuple with ``(img1, img2, flow, valid_flow_mask)`` is returned.
			
 
				         """
			
 
				         return super().__getitem__(index)
			
@@ -455,7 +455,7 @@ class HD1K(FlowDataset):
 
				 
			
 
				         Returns:
			
 
				             tuple: A 4-tuple with ``(img1, img2, flow, valid_flow_mask)`` where ``valid_flow_mask``
			
 
				-            is a numpy boolean mask of shape (H, W)
			
 
				+            is a numpy boolean ins of shape (H, W)
			
 
				             indicating which flow values are valid. The flow is a numpy array of
			
 
				             shape (2, H, W) and the images are PIL images. ``flow`` and ``valid_flow_mask`` are None if
			
 
				             ``split="test"``.
			
--- a/libs/vision_libs/datasets/_stereo_matching.py
+++ b/libs/vision_libs/datasets/_stereo_matching.py
@@ -41,12 +41,12 @@ class StereoMatchingDataset(ABC, VisionDataset):
 
				                 For training splits generally the datasets provide a minimal guarantee of
			
 
				                 images: (``PIL.Image``, ``PIL.Image``)
			
 
				                 disparities: (``np.ndarray``, ``None``) with shape (1, H, W)
			
 
				-                Optionally, based on the dataset, it can return a ``mask`` as well:
			
 
				+                Optionally, based on the dataset, it can return a ``ins`` as well:
			
 
				                 valid_masks: (``np.ndarray | None``, ``None``) with shape (H, W)
			
 
				                 For some test splits, the datasets provides outputs that look like:
			
 
				                 imgaes: (``PIL.Image``, ``PIL.Image``)
			
 
				                 disparities: (``None``, ``None``)
			
 
				-                Optionally, based on the dataset, it can return a ``mask`` as well:
			
 
				+                Optionally, based on the dataset, it can return a ``ins`` as well:
			
 
				                 valid_masks: (``None``, ``None``)
			
 
				         """
			
 
				         super().__init__(root=root)
			
@@ -104,7 +104,7 @@ class StereoMatchingDataset(ABC, VisionDataset):
 
				 
			
 
				         Returns:
			
 
				             tuple: A 3 or 4-tuple with ``(img_left, img_right, disparity, Optional[valid_mask])`` where ``valid_mask``
			
 
				-                can be a numpy boolean mask of shape (H, W) if the dataset provides a file
			
 
				+                can be a numpy boolean ins of shape (H, W) if the dataset provides a file
			
 
				                 indicating which disparity pixels are valid. The disparity is a numpy array of
			
 
				                 shape (1, H, W) and the images are PIL images. ``disparity`` is None for
			
 
				                 datasets on which for ``split="test"`` the authors did not provide annotations.
			
@@ -278,7 +278,7 @@ class Kitti2012Stereo(StereoMatchingDataset):
 
				             tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
			
 
				             The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
			
 
				             ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not
			
 
				-            generate a valid mask.
			
 
				+            generate a valid ins.
			
 
				             Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test.
			
 
				         """
			
 
				         return cast(T1, super().__getitem__(index))
			
@@ -366,7 +366,7 @@ class Kitti2015Stereo(StereoMatchingDataset):
 
				             tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
			
 
				             The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
			
 
				             ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not
			
 
				-            generate a valid mask.
			
 
				+            generate a valid ins.
			
 
				             Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test.
			
 
				         """
			
 
				         return cast(T1, super().__getitem__(index))
			
@@ -573,7 +573,7 @@ class Middlebury2014Stereo(StereoMatchingDataset):
 
				         disparity_map = _read_pfm_file(file_path)
			
 
				         disparity_map = np.abs(disparity_map)  # ensure that the disparity is positive
			
 
				         disparity_map[disparity_map == np.inf] = 0  # remove infinite disparities
			
 
				-        valid_mask = (disparity_map > 0).squeeze(0)  # mask out invalid disparities
			
 
				+        valid_mask = (disparity_map > 0).squeeze(0)  # ins out invalid disparities
			
 
				         return disparity_map, valid_mask
			
 
				 
			
 
				     def _download_dataset(self, root: str) -> None:
			
@@ -712,7 +712,7 @@ class CREStereo(StereoMatchingDataset):
 
				             tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
			
 
				             The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
			
 
				             ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not
			
 
				-            generate a valid mask.
			
 
				+            generate a valid ins.
			
 
				         """
			
 
				         return cast(T1, super().__getitem__(index))
			
 
				 
			
@@ -1001,7 +1001,7 @@ class SintelStereo(StereoMatchingDataset):
 
				             self._disparities += self._scan_pairs(disparity_pattern, None)
			
 
				 
			
 
				     def _get_occlussion_mask_paths(self, file_path: str) -> Tuple[str, str]:
			
 
				-        # helper function to get the occlusion mask paths
			
 
				+        # helper function to get the occlusion ins paths
			
 
				         # a path will look like  .../.../.../training/disparities/scene1/img1.png
			
 
				         # we want to get something like .../.../.../training/occlusions/scene1/img1.png
			
 
				         fpath = Path(file_path)
			
@@ -1014,10 +1014,10 @@ class SintelStereo(StereoMatchingDataset):
 
				         outofframe_path = str(sampledir / "outofframe" / scenedir.name / basename)
			
 
				 
			
 
				         if not os.path.exists(occlusion_path):
			
 
				-            raise FileNotFoundError(f"Occlusion mask {occlusion_path} does not exist")
			
 
				+            raise FileNotFoundError(f"Occlusion ins {occlusion_path} does not exist")
			
 
				 
			
 
				         if not os.path.exists(outofframe_path):
			
 
				-            raise FileNotFoundError(f"Out of frame mask {outofframe_path} does not exist")
			
 
				+            raise FileNotFoundError(f"Out of frame ins {outofframe_path} does not exist")
			
 
				 
			
 
				         return occlusion_path, outofframe_path
			
 
				 
			
@@ -1218,7 +1218,7 @@ class ETH3DStereo(StereoMatchingDataset):
 
				             tuple: A 4-tuple with ``(img_left, img_right, disparity, valid_mask)``.
			
 
				             The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
			
 
				             ``valid_mask`` is implicitly ``None`` if the ``transforms`` parameter does not
			
 
				-            generate a valid mask.
			
 
				+            generate a valid ins.
			
 
				             Both ``disparity`` and ``valid_mask`` are ``None`` if the dataset split is test.
			
 
				         """
			
 
				         return cast(T2, super().__getitem__(index))
			
--- a/libs/vision_libs/models/_meta.py
+++ b/libs/vision_libs/models/_meta.py
@@ -648,7 +648,7 @@ _IMAGENET_CATEGORIES = [
 
				     "manhole cover",
			
 
				     "maraca",
			
 
				     "marimba",
			
 
				-    "mask",
			
 
				+    "ins",
			
 
				     "matchstick",
			
 
				     "maypole",
			
 
				     "maze",
			
@@ -696,7 +696,7 @@ _IMAGENET_CATEGORIES = [
 
				     "oscilloscope",
			
 
				     "overskirt",
			
 
				     "oxcart",
			
 
				-    "oxygen mask",
			
 
				+    "oxygen ins",
			
 
				     "packet",
			
 
				     "paddle",
			
 
				     "paddlewheel",
			
@@ -801,7 +801,7 @@ _IMAGENET_CATEGORIES = [
 
				     "shower cap",
			
 
				     "shower curtain",
			
 
				     "ski",
			
 
				-    "ski mask",
			
 
				+    "ski ins",
			
 
				     "sleeping bag",
			
 
				     "slide rule",
			
 
				     "sliding door",
			
--- a/libs/vision_libs/models/detection/_utils.py
+++ b/libs/vision_libs/models/detection/_utils.py
@@ -58,7 +58,7 @@ class BalancedPositiveNegativeSampler:
 
				             pos_idx_per_image = positive[perm1]
			
 
				             neg_idx_per_image = negative[perm2]
			
 
				 
			
 
				-            # create binary mask from indices
			
 
				+            # create binary ins from indices
			
 
				             pos_idx_per_image_mask = torch.zeros_like(matched_idxs_per_image, dtype=torch.uint8)
			
 
				             neg_idx_per_image_mask = torch.zeros_like(matched_idxs_per_image, dtype=torch.uint8)
			
 
				 
			
--- a/libs/vision_libs/models/detection/fcos.py
+++ b/libs/vision_libs/models/detection/fcos.py
@@ -565,7 +565,7 @@ class FCOS(nn.Module):
 
				             result (list[BoxList] or dict[Tensor]): the output from the model.
			
 
				                 During training, it returns a dict[Tensor] which contains the losses.
			
 
				                 During testing, it returns list[BoxList] contains additional fields
			
 
				-                like `scores`, `labels` and `mask` (for Mask R-CNN models).
			
 
				+                like `scores`, `labels` and `ins` (for Mask R-CNN models).
			
 
				         """
			
 
				         if self.training:
			
 
				 
			
--- a/libs/vision_libs/models/detection/generalized_rcnn.py
+++ b/libs/vision_libs/models/detection/generalized_rcnn.py
@@ -54,7 +54,7 @@ class GeneralizedRCNN(nn.Module):
 
				             result (list[BoxList] or dict[Tensor]): the output from the model.
			
 
				                 During training, it returns a dict[Tensor] which contains the losses.
			
 
				                 During testing, it returns list[BoxList] contains additional fields
			
 
				-                like `scores`, `labels` and `mask` (for Mask R-CNN models).
			
 
				+                like `scores`, `labels` and `ins` (for Mask R-CNN models).
			
 
				 
			
 
				         """
			
 
				         if self.training:
			
--- a/libs/vision_libs/models/detection/mask_rcnn.py
+++ b/libs/vision_libs/models/detection/mask_rcnn.py
@@ -41,7 +41,7 @@ class MaskRCNN(FasterRCNN):
 
				         - masks (UInt8Tensor[N, H, W]): the segmentation binary masks for each instance
			
 
				 
			
 
				     The model returns a Dict[Tensor] during training, containing the classification and regression
			
 
				-    losses for both the RPN and the R-CNN, and the mask loss.
			
 
				+    losses for both the RPN and the R-CNN, and the ins loss.
			
 
				 
			
 
				     During inference, the model requires only the input tensors, and returns the post-processed
			
 
				     predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
			
@@ -52,7 +52,7 @@ class MaskRCNN(FasterRCNN):
 
				         - scores (Tensor[N]): the scores or each prediction
			
 
				         - masks (UInt8Tensor[N, 1, H, W]): the predicted masks for each instance, in 0-1 range. In order to
			
 
				           obtain the final segmentation masks, the soft masks can be thresholded, generally
			
 
				-          with a value of 0.5 (mask >= 0.5)
			
 
				+          with a value of 0.5 (ins >= 0.5)
			
 
				 
			
 
				     Args:
			
 
				         backbone (nn.Module): the network used to compute the features for the model.
			
@@ -106,10 +106,10 @@ class MaskRCNN(FasterRCNN):
 
				         bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the
			
 
				             bounding boxes
			
 
				         mask_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
			
 
				-             the locations indicated by the bounding boxes, which will be used for the mask head.
			
 
				+             the locations indicated by the bounding boxes, which will be used for the ins head.
			
 
				         mask_head (nn.Module): module that takes the cropped feature maps as input
			
 
				         mask_predictor (nn.Module): module that takes the output of the mask_head and returns the
			
 
				-            segmentation mask logits
			
 
				+            segmentation ins logits
			
 
				 
			
 
				     Example::
			
 
				 
			
@@ -433,7 +433,7 @@ def maskrcnn_resnet50_fpn(
 
				         - masks (``UInt8Tensor[N, H, W]``): the segmentation binary masks for each instance
			
 
				 
			
 
				     The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
			
 
				-    losses for both the RPN and the R-CNN, and the mask loss.
			
 
				+    losses for both the RPN and the R-CNN, and the ins loss.
			
 
				 
			
 
				     During inference, the model requires only the input tensors, and returns the post-processed
			
 
				     predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
			
@@ -445,7 +445,7 @@ def maskrcnn_resnet50_fpn(
 
				         - scores (``Tensor[N]``): the scores or each instance
			
 
				         - masks (``UInt8Tensor[N, 1, H, W]``): the predicted masks for each instance, in ``0-1`` range. In order to
			
 
				           obtain the final segmentation masks, the soft masks can be thresholded, generally
			
 
				-          with a value of 0.5 (``mask >= 0.5``)
			
 
				+          with a value of 0.5 (``ins >= 0.5``)
			
 
				 
			
 
				     For more details on the output and on how to plot the masks, you may refer to :ref:`instance_seg_output`.
			
 
				 
			
--- a/libs/vision_libs/models/detection/retinanet.py
+++ b/libs/vision_libs/models/detection/retinanet.py
@@ -577,7 +577,7 @@ class RetinaNet(nn.Module):
 
				             result (list[BoxList] or dict[Tensor]): the output from the model.
			
 
				                 During training, it returns a dict[Tensor] which contains the losses.
			
 
				                 During testing, it returns list[BoxList] contains additional fields
			
 
				-                like `scores`, `labels` and `mask` (for Mask R-CNN models).
			
 
				+                like `scores`, `labels` and `ins` (for Mask R-CNN models).
			
 
				 
			
 
				         """
			
 
				         if self.training:
			
--- a/libs/vision_libs/models/detection/roi_heads.py
+++ b/libs/vision_libs/models/detection/roi_heads.py
@@ -53,18 +53,18 @@ def maskrcnn_inference(x, labels):
 
				     # type: (Tensor, List[Tensor]) -> List[Tensor]
			
 
				     """
			
 
				     From the results of the CNN, post process the masks
			
 
				-    by taking the mask corresponding to the class with max
			
 
				+    by taking the ins corresponding to the class with max
			
 
				     probability (which are of fixed size and directly output
			
 
				-    by the CNN) and return the masks in the mask field of the BoxList.
			
 
				+    by the CNN) and return the masks in the ins field of the BoxList.
			
 
				 
			
 
				     Args:
			
 
				-        x (Tensor): the mask logits
			
 
				+        x (Tensor): the ins logits
			
 
				         labels (list[BoxList]): bounding boxes that are used as
			
 
				             reference, one for ech image
			
 
				 
			
 
				     Returns:
			
 
				         results (list[BoxList]): one BoxList for each image, containing
			
 
				-            the extra field mask
			
 
				+            the extra field ins
			
 
				     """
			
 
				     mask_prob = x.sigmoid()
			
 
				 
			
@@ -411,7 +411,7 @@ def paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, -1, -1))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
@@ -437,7 +437,7 @@ def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
--- a/libs/vision_libs/models/optical_flow/raft.py
+++ b/libs/vision_libs/models/optical_flow/raft.py
@@ -311,7 +311,7 @@ class UpdateBlock(nn.Module):
 
				 class MaskPredictor(nn.Module):
			
 
				     """Mask predictor to be used when upsampling the predicted flow.
			
 
				 
			
 
				-    It takes the hidden state of the recurrent unit as input and outputs the mask.
			
 
				+    It takes the hidden state of the recurrent unit as input and outputs the ins.
			
 
				     This is not used in the raft-small model.
			
 
				     """
			
 
				 
			
@@ -464,7 +464,7 @@ class RAFT(nn.Module):
 
				                 flow head. It takes as input the hidden state of its recurrent unit, the context, the correlation
			
 
				                 features, and the current predicted flow. It outputs an updated hidden state, and the ``delta_flow``
			
 
				                 prediction (see paper appendix A). It must expose a ``hidden_state_size`` attribute.
			
 
				-            mask_predictor (nn.Module, optional): Predicts the mask that will be used to upsample the predicted flow.
			
 
				+            mask_predictor (nn.Module, optional): Predicts the ins that will be used to upsample the predicted flow.
			
 
				                 The output channel must be 8 * 8 * 9 - see paper section 3.3, and Appendix B.
			
 
				                 If ``None`` (default), the flow is upsampled using interpolation.
			
 
				         """
			
--- a/libs/vision_libs/models/swin_transformer.py
+++ b/libs/vision_libs/models/swin_transformer.py
@@ -191,7 +191,7 @@ def shifted_window_attention(
 
				     attn = attn + relative_position_bias
			
 
				 
			
 
				     if sum(shift_size) > 0:
			
 
				-        # generate attention mask
			
 
				+        # generate attention ins
			
 
				         attn_mask = x.new_zeros((pad_H, pad_W))
			
 
				         h_slices = ((0, -window_size[0]), (-window_size[0], -shift_size[0]), (-shift_size[0], None))
			
 
				         w_slices = ((0, -window_size[1]), (-window_size[1], -shift_size[1]), (-shift_size[1], None))
			
--- a/libs/vision_libs/models/video/swin_transformer.py
+++ b/libs/vision_libs/models/video/swin_transformer.py
@@ -74,7 +74,7 @@ def _compute_attention_mask_3d(
 
				     window_size: Tuple[int, int, int],
			
 
				     shift_size: Tuple[int, int, int],
			
 
				 ) -> Tensor:
			
 
				-    # generate attention mask
			
 
				+    # generate attention ins
			
 
				     attn_mask = x.new_zeros(*size_dhw)
			
 
				     num_windows = (size_dhw[0] // window_size[0]) * (size_dhw[1] // window_size[1]) * (size_dhw[2] // window_size[2])
			
 
				     slices = [
			
@@ -184,7 +184,7 @@ def shifted_window_attention_3d(
 
				     attn = attn + relative_position_bias
			
 
				 
			
 
				     if sum(shift_size) > 0:
			
 
				-        # generate attention mask to handle shifted windows with varying size
			
 
				+        # generate attention ins to handle shifted windows with varying size
			
 
				         attn_mask = _compute_attention_mask_3d(
			
 
				             x,
			
 
				             (padded_size[0], padded_size[1], padded_size[2]),
			
--- a/libs/vision_libs/ops/deform_conv.py
+++ b/libs/vision_libs/ops/deform_conv.py
@@ -24,10 +24,10 @@ def deform_conv2d(
 
				     r"""
			
 
				     Performs Deformable Convolution v2, described in
			
 
				     `Deformable ConvNets v2: More Deformable, Better Results
			
 
				-    <https://arxiv.org/abs/1811.11168>`__ if :attr:`mask` is not ``None`` and
			
 
				+    <https://arxiv.org/abs/1811.11168>`__ if :attr:`ins` is not ``None`` and
			
 
				     Performs Deformable Convolution, described in
			
 
				     `Deformable Convolutional Networks
			
 
				-    <https://arxiv.org/abs/1703.06211>`__ if :attr:`mask` is ``None``.
			
 
				+    <https://arxiv.org/abs/1703.06211>`__ if :attr:`ins` is ``None``.
			
 
				 
			
 
				     Args:
			
 
				         input (Tensor[batch_size, in_channels, in_height, in_width]): input tensor
			
@@ -50,12 +50,12 @@ def deform_conv2d(
 
				         >>> input = torch.rand(4, 3, 10, 10)
			
 
				         >>> kh, kw = 3, 3
			
 
				         >>> weight = torch.rand(5, 3, kh, kw)
			
 
				-        >>> # offset and mask should have the same spatial size as the output
			
 
				+        >>> # offset and ins should have the same spatial size as the output
			
 
				         >>> # of the convolution. In this case, for an input of 10, stride of 1
			
 
				         >>> # and kernel size of 3, without padding, the output size is 8
			
 
				         >>> offset = torch.rand(4, 2 * kh * kw, 8, 8)
			
 
				-        >>> mask = torch.rand(4, kh * kw, 8, 8)
			
 
				-        >>> out = deform_conv2d(input, offset, weight, mask=mask)
			
 
				+        >>> ins = torch.rand(4, kh * kw, 8, 8)
			
 
				+        >>> out = deform_conv2d(input, offset, weight, ins=ins)
			
 
				         >>> print(out.shape)
			
 
				         >>> # returns
			
 
				         >>>  torch.Size([4, 5, 8, 8])
			
--- a/libs/vision_libs/ops/roi_align.py
+++ b/libs/vision_libs/ops/roi_align.py
@@ -146,7 +146,7 @@ def _roi_align(input, rois, spatial_scale, pooled_height, pooled_width, sampling
 
				         # is data-dependent based on how big the ROIs are.  This is a bit
			
 
				         # awkward because first-class dims can't actually handle this.
			
 
				         # So instead, we inefficiently suppose that we needed to sample ALL
			
 
				-        # the points and mask out things that turned out to be unnecessary
			
 
				+        # the points and ins out things that turned out to be unnecessary
			
 
				         iy = torch.arange(height, device=input.device)  # [IY]
			
 
				         ix = torch.arange(width, device=input.device)  # [IX]
			
 
				         ymask = iy[None, :] < roi_bin_grid_h[:, None]  # [K, IY]
			
--- a/libs/vision_libs/transforms/_functional_tensor.py
+++ b/libs/vision_libs/transforms/_functional_tensor.py
@@ -550,7 +550,7 @@ def _apply_grid_transform(
 
				         # Apply same grid to a batch of images
			
 
				         grid = grid.expand(img.shape[0], grid.shape[1], grid.shape[2], grid.shape[3])
			
 
				 
			
 
				-    # Append a dummy mask for customized fill colors, should be faster than grid_sample() twice
			
 
				+    # Append a dummy ins for customized fill colors, should be faster than grid_sample() twice
			
 
				     if fill is not None:
			
 
				         mask = torch.ones((img.shape[0], 1, img.shape[2], img.shape[3]), dtype=img.dtype, device=img.device)
			
 
				         img = torch.cat((img, mask), dim=1)
			
--- a/libs/vision_libs/transforms/v2/_utils.py
+++ b/libs/vision_libs/transforms/v2/_utils.py
@@ -191,7 +191,7 @@ def query_size(flat_inputs: List[Any]) -> Tuple[int, int]:
 
				         )
			
 
				     }
			
 
				     if not sizes:
			
 
				-        raise TypeError("No image, video, mask or bounding box was found in the sample")
			
 
				+        raise TypeError("No image, video, ins or bounding box was found in the sample")
			
 
				     elif len(sizes) > 1:
			
 
				         raise ValueError(f"Found multiple HxW dimensions in the sample: {sequence_to_str(sorted(sizes))}")
			
 
				     h, w = sizes.pop()
			
--- a/libs/vision_libs/transforms/v2/functional/_geometry.py
+++ b/libs/vision_libs/transforms/v2/functional/_geometry.py
@@ -579,7 +579,7 @@ def _apply_grid_transform(img: torch.Tensor, grid: torch.Tensor, mode: str, fill
 
				         # Apply same grid to a batch of images
			
 
				         grid = grid.expand(squashed_batch_size, -1, -1, -1)
			
 
				 
			
 
				-    # Append a dummy mask for customized fill colors, should be faster than grid_sample() twice
			
 
				+    # Append a dummy ins for customized fill colors, should be faster than grid_sample() twice
			
 
				     if fill is not None:
			
 
				         mask = torch.ones(
			
 
				             (squashed_batch_size, 1, input_height, input_width), dtype=float_img.dtype, device=float_img.device
			
@@ -599,7 +599,7 @@ def _apply_grid_transform(img: torch.Tensor, grid: torch.Tensor, mode: str, fill
 
				             float_img[bool_mask] = fill_img.expand_as(float_img)[bool_mask]
			
 
				         else:  # 'bilinear'
			
 
				             # The following is mathematically equivalent to:
			
 
				-            # img * mask + (1.0 - mask) * fill = img * mask - fill * mask + fill = mask * (img - fill) + fill
			
 
				+            # img * ins + (1.0 - ins) * fill = img * ins - fill * ins + fill = ins * (img - fill) + fill
			
 
				             float_img = float_img.sub_(fill_img).mul_(mask).add_(fill_img)
			
 
				 
			
 
				     img = float_img.round_().to(img.dtype) if not fp else float_img
			
--- a/libs/vision_libs/tv_tensors/_dataset_wrapper.py
+++ b/libs/vision_libs/tv_tensors/_dataset_wrapper.py
@@ -65,7 +65,7 @@ def wrap_dataset_for_transforms_v2(dataset, target_keys=None):
 
				 
			
 
				         Segmentation datasets, e.g. :class:`~torchvision.datasets.VOCSegmentation`, return a two-tuple of
			
 
				         :class:`PIL.Image.Image`'s. This wrapper leaves the image as is (first item), while wrapping the
			
 
				-        segmentation mask into a :class:`~torchvision.tv_tensors.Mask` (second item).
			
 
				+        segmentation ins into a :class:`~torchvision.tv_tensors.Mask` (second item).
			
 
				 
			
 
				     Video classification datasets
			
 
				 
			
--- a/libs/vision_libs/tv_tensors/_mask.py
+++ b/libs/vision_libs/tv_tensors/_mask.py
@@ -17,7 +17,7 @@ class Mask(TVTensor):
 
				         dtype (torch.dtype, optional): Desired data type. If omitted, will be inferred from
			
 
				             ``data``.
			
 
				         device (torch.device, optional): Desired device. If omitted and ``data`` is a
			
 
				-            :class:`torch.Tensor`, the device is taken from it. Otherwise, the mask is constructed on the CPU.
			
 
				+            :class:`torch.Tensor`, the device is taken from it. Otherwise, the ins is constructed on the CPU.
			
 
				         requires_grad (bool, optional): Whether autograd should record operations. If omitted and
			
 
				             ``data`` is a :class:`torch.Tensor`, the value is taken from it. Otherwise, defaults to ``False``.
			
 
				     """
			
--- a/libs/vision_libs/utils.py
+++ b/libs/vision_libs/utils.py
@@ -272,7 +272,7 @@ def draw_segmentation_masks(
 
				         colors (color or list of colors, optional): List containing the colors
			
 
				             of the masks or single color for all masks. The color can be represented as
			
 
				             PIL strings e.g. "red" or "#FF00FF", or as RGB tuples e.g. ``(240, 10, 157)``.
			
 
				-            By default, random colors are generated for each mask.
			
 
				+            By default, random colors are generated for each ins.
			
 
				 
			
 
				     Returns:
			
 
				         img (Tensor[C, H, W]): Image Tensor, with segmentation masks drawn on top.
			
@@ -300,7 +300,7 @@ def draw_segmentation_masks(
 
				     num_masks = masks.size()[0]
			
 
				 
			
 
				     if num_masks == 0:
			
 
				-        warnings.warn("masks doesn't contain any mask. No mask was drawn")
			
 
				+        warnings.warn("masks doesn't contain any ins. No ins was drawn")
			
 
				         return image
			
 
				 
			
 
				     out_dtype = torch.uint8
			
--- a/models/base/base_detection_net.py
+++ b/models/base/base_detection_net.py
@@ -64,7 +64,7 @@ class BaseDetectionNet(BaseModel):
 
				             result (list[BoxList] or dict[Tensor]): the output from the model.
			
 
				                 During training, it returns a dict[Tensor] which contains the losses.
			
 
				                 During testing, it returns list[BoxList] contains additional fields
			
 
				-                like `scores`, `labels` and `mask` (for Mask R-CNN models).
			
 
				+                like `scores`, `labels` and `ins` (for Mask R-CNN models).
			
 
				 
			
 
				         """
			
 
				 
			
--- a/models/base/high_reso_swin.py
+++ b/models/base/high_reso_swin.py
@@ -200,7 +200,7 @@ def shifted_window_attention(
 
				     attn = attn + relative_position_bias
			
 
				 
			
 
				     if sum(shift_size) > 0:
			
 
				-        # generate attention mask
			
 
				+        # generate attention ins
			
 
				         attn_mask = x.new_zeros((pad_H, pad_W))
			
 
				         h_slices = ((0, -window_size[0]), (-window_size[0], -shift_size[0]), (-shift_size[0], None))
			
 
				         w_slices = ((0, -window_size[1]), (-window_size[1], -shift_size[1]), (-shift_size[1], None))
			
--- a/models/dataset_tool.py
+++ b/models/dataset_tool.py
@@ -287,7 +287,7 @@ def read_masks_from_pixels_wire(lbl_path, shape):
 
				         lines = json.load(reader)
			
 
				         mask_points = []
			
 
				         for line in lines["segmentations"]:
			
 
				-            # mask = torch.zeros((h, w), dtype=torch.uint8)
			
 
				+            # ins = torch.zeros((h, w), dtype=torch.uint8)
			
 
				             # parts = line["data"]
			
 
				             # print(f'parts:{parts}')
			
 
				             cls = torch.tensor(int(line["cls_id"]), dtype=torch.int64)
			
@@ -301,8 +301,8 @@ def read_masks_from_pixels_wire(lbl_path, shape):
 
				             #     mask_points.append((int(y * h), int(x * w)))
			
 
				 
			
 
				             # for p in mask_points:
			
 
				-            #     mask[p] = 1
			
 
				-            # masks.append(mask)
			
 
				+            #     ins[p] = 1
			
 
				+            # masks.append(ins)
			
 
				     reader.close()
			
 
				     return labels
			
 
				 
			
--- a/models/ins/maskrcnn.py
+++ b/models/ins/maskrcnn.py
@@ -70,7 +70,7 @@ class MaskRCNNModel(nn.Module):
 
				         print(f'result:{result}')
			
 
				         masks = result[0]['masks']
			
 
				         boxes = result[0]['boxes']
			
 
				-        # cv2.imshow('mask',masks[0].cpu().detach().numpy())
			
 
				+        # cv2.imshow('ins',masks[0].cpu().detach().numpy())
			
 
				         boxes = boxes.cpu().detach()
			
 
				         drawn_boxes = draw_bounding_boxes((img * 255).to(torch.uint8), boxes, colors="red", width=5)
			
 
				         print(f'drawn_boxes:{drawn_boxes.shape}')
			
@@ -81,7 +81,7 @@ class MaskRCNNModel(nn.Module):
 
				         mask = masks[0].cpu().detach().permute(1, 2, 0).numpy()
			
 
				 
			
 
				         mask = cv2.resize(mask, (800, 800))
			
 
				-        # cv2.imshow('mask',mask)
			
 
				+        # cv2.imshow('ins',ins)
			
 
				         img = img.cpu().detach().permute(1, 2, 0).numpy()
			
 
				 
			
 
				         masked_img = self.overlay_masks_on_image(boxed_img, masks)
			
--- a/models/ins_detect/mask_rcnn.py
+++ b/models/ins_detect/mask_rcnn.py
@@ -41,7 +41,7 @@ class InsDetectNet(FasterRCNN):
 
				         - masks (UInt8Tensor[N, H, W]): the segmentation binary masks for each instance
			
 
				 
			
 
				     The model returns a Dict[Tensor] during training, containing the classification and regression
			
 
				-    losses for both the RPN and the R-CNN, and the mask loss.
			
 
				+    losses for both the RPN and the R-CNN, and the ins loss.
			
 
				 
			
 
				     During inference, the model requires only the input tensors, and returns the post-processed
			
 
				     predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
			
@@ -52,7 +52,7 @@ class InsDetectNet(FasterRCNN):
 
				         - scores (Tensor[N]): the scores or each prediction
			
 
				         - masks (UInt8Tensor[N, 1, H, W]): the predicted masks for each instance, in 0-1 range. In order to
			
 
				           obtain the final segmentation masks, the soft masks can be thresholded, generally
			
 
				-          with a value of 0.5 (mask >= 0.5)
			
 
				+          with a value of 0.5 (ins >= 0.5)
			
 
				 
			
 
				     Args:
			
 
				         backbone (nn.Module): the network used to compute the features for the model.
			
@@ -106,10 +106,10 @@ class InsDetectNet(FasterRCNN):
 
				         bbox_reg_weights (Tuple[float, float, float, float]): weights for the encoding/decoding of the
			
 
				             bounding boxes
			
 
				         mask_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
			
 
				-             the locations indicated by the bounding boxes, which will be used for the mask head.
			
 
				+             the locations indicated by the bounding boxes, which will be used for the ins head.
			
 
				         mask_head (nn.Module): module that takes the cropped feature maps as input
			
 
				         mask_predictor (nn.Module): module that takes the output of the mask_head and returns the
			
 
				-            segmentation mask logits
			
 
				+            segmentation ins logits
			
 
				 
			
 
				     Example::
			
 
				 
			
@@ -433,7 +433,7 @@ def maskrcnn_resnet50_fpn(
 
				         - masks (``UInt8Tensor[N, H, W]``): the segmentation binary masks for each instance
			
 
				 
			
 
				     The model returns a ``Dict[Tensor]`` during training, containing the classification and regression
			
 
				-    losses for both the RPN and the R-CNN, and the mask loss.
			
 
				+    losses for both the RPN and the R-CNN, and the ins loss.
			
 
				 
			
 
				     During inference, the model requires only the input tensors, and returns the post-processed
			
 
				     predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as
			
@@ -445,7 +445,7 @@ def maskrcnn_resnet50_fpn(
 
				         - scores (``Tensor[N]``): the scores or each instance
			
 
				         - masks (``UInt8Tensor[N, 1, H, W]``): the predicted masks for each instance, in ``0-1`` range. In order to
			
 
				           obtain the final segmentation masks, the soft masks can be thresholded, generally
			
 
				-          with a value of 0.5 (``mask >= 0.5``)
			
 
				+          with a value of 0.5 (``ins >= 0.5``)
			
 
				 
			
 
				     For more details on the output and on how to plot the masks, you may refer to :ref:`instance_seg_output`.
			
 
				 
			
--- a/models/ins_detect/maskrcnn.py
+++ b/models/ins_detect/maskrcnn.py
@@ -68,7 +68,7 @@ class MaskRCNNModel(nn.Module):
 
				         print(f'result:{result}')
			
 
				         masks = result[0]['masks']
			
 
				         boxes = result[0]['boxes']
			
 
				-        # cv2.imshow('mask',masks[0].cpu().detach().numpy())
			
 
				+        # cv2.imshow('ins',masks[0].cpu().detach().numpy())
			
 
				         boxes = boxes.cpu().detach()
			
 
				         drawn_boxes = draw_bounding_boxes((img * 255).to(torch.uint8), boxes, colors="red", width=5)
			
 
				         print(f'drawn_boxes:{drawn_boxes.shape}')
			
@@ -79,7 +79,7 @@ class MaskRCNNModel(nn.Module):
 
				         mask = masks[0].cpu().detach().permute(1, 2, 0).numpy()
			
 
				 
			
 
				         mask = cv2.resize(mask, (800, 800))
			
 
				-        # cv2.imshow('mask',mask)
			
 
				+        # cv2.imshow('ins',ins)
			
 
				         img = img.cpu().detach().permute(1, 2, 0).numpy()
			
 
				 
			
 
				         masked_img = self.overlay_masks_on_image(boxed_img, masks)
			
--- a/models/ins_detect/test_datasets.py
+++ b/models/ins_detect/test_datasets.py
@@ -137,8 +137,8 @@ def trans_datasets_format():
 
				 
			
 
				                 # non_zero_coords = np.nonzero(inm.reshape(width,height).T)
			
 
				                 # coords_list = list(zip(non_zero_coords[0], non_zero_coords[1]))
			
 
				-                # # print(f'mask:{mask[0,333]}')
			
 
				-                # print(f'mask pixels:{coords_list}')
			
 
				+                # # print(f'ins:{ins[0,333]}')
			
 
				+                # print(f'ins pixels:{coords_list}')
			
 
				                 #
			
 
				                 #
			
 
				                 # for coord in coords_list:
			
@@ -150,7 +150,7 @@ def trans_datasets_format():
 
				         writer.close()
			
 
				         print(f'label:{label}')
			
 
				         # plt.imshow(img)
			
 
				-        # plt.imshow(mask, cmap='Reds', alpha=0.3)
			
 
				+        # plt.imshow(ins, cmap='Reds', alpha=0.3)
			
 
				         # plt.show()
			
 
				 
			
 
				 
			
@@ -168,17 +168,17 @@ def compute_mask(row, shape):
 
				         mask[s:s + l] = 255
			
 
				     mask = mask.reshape((width, height)).T
			
 
				 
			
 
				-    # mask = np.flipud(np.rot90(mask.reshape((height, width))))
			
 
				+    # ins = np.flipud(np.rot90(ins.reshape((height, width))))
			
 
				     return label, mask
			
 
				 
			
 
				 def cluster_dbscan(mask,image):
			
 
				-    # 将 mask 转换为二值图像
			
 
				+    # 将 ins 转换为二值图像
			
 
				     _, mask_binary = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
			
 
				 
			
 
				-    # 将 mask 一维化
			
 
				+    # 将 ins 一维化
			
 
				     mask_flattened = mask_binary.flatten()
			
 
				 
			
 
				-    # 获取 mask 中的前景像素坐标
			
 
				+    # 获取 ins 中的前景像素坐标
			
 
				     foreground_pixels = np.argwhere(mask_flattened == 255)
			
 
				 
			
 
				     # 将像素坐标转换为二维坐标
			
@@ -219,14 +219,14 @@ def cluster_dbscan(mask,image):
 
				     return unique_labels,clustered_points
			
 
				 
			
 
				 def show_cluster_dbscan(mask,image,unique_labels,clustered_points,):
			
 
				-    print(f'mask shape:{mask.shape}')
			
 
				-    # 将 mask 转换为二值图像
			
 
				+    print(f'ins shape:{mask.shape}')
			
 
				+    # 将 ins 转换为二值图像
			
 
				     _, mask_binary = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
			
 
				 
			
 
				-    # 将 mask 一维化
			
 
				+    # 将 ins 一维化
			
 
				     mask_flattened = mask_binary.flatten()
			
 
				 
			
 
				-    # 获取 mask 中的前景像素坐标
			
 
				+    # 获取 ins 中的前景像素坐标
			
 
				     foreground_pixels = np.argwhere(mask_flattened == 255)
			
 
				     # print(f'unique_labels:{unique_labels}')
			
 
				     # 创建一个空的图像来保存聚类结果
			
@@ -326,11 +326,11 @@ def test_mask():
 
				             # points = np.array([[float(parts[i]), float(parts[i + 1])] for i in range(1, len(parts), 2)])
			
 
				             # mask_resized = cv2.resize(points, (1600, 256), interpolation=cv2.INTER_NEAREST)
			
 
				             print(f'points:{points}')
			
 
				-            # mask[points[:,0],points[:,1]]=255
			
 
				+            # ins[points[:,0],points[:,1]]=255
			
 
				             for p in points:
			
 
				                 mask[p] = 255
			
 
				-            # cv2.fillPoly(mask, points, color=(255,))
			
 
				-    cv2.imshow('mask', mask)
			
 
				+            # cv2.fillPoly(ins, points, color=(255,))
			
 
				+    cv2.imshow('ins', mask)
			
 
				     for row in df.itertuples():
			
 
				         img_name = name + '.jpg'
			
 
				         if img_name == getattr(row, 'ImageId'):
			
@@ -391,7 +391,7 @@ def show_img_mask(img_path):
 
				         # mask_3channel = cv2.merge([np.zeros_like(img_mask), np.zeros_like(img_mask), img_mask])
			
 
				         # masked_image = cv2.addWeighted(test_img, 1, mask_3channel, 0.6, 0)
			
 
				 
			
 
				-    # cv2.imshow('cv2 mask img', masked_image)
			
 
				+    # cv2.imshow('cv2 ins img', masked_image)
			
 
				     # cv2.waitKey(0)
			
 
				     plt.show()
			
 
				 
			
@@ -459,7 +459,7 @@ def show_dataset():
 
				         masks=targets[0]['masks']
			
 
				         boxes = targets[0]['boxes']
			
 
				         print(f'boxes:{boxes}')
			
 
				-        # mask[mask == 255] = 1
			
 
				+        # ins[ins == 255] = 1
			
 
				         # img = np.array(imgs[2].permute(1, 2, 0)) * 255
			
 
				         show_boxes_masks( imgs, boxes,masks)
			
 
				 
			
@@ -471,14 +471,14 @@ def show_boxes_masks(imgs, boxes,masks):
 
				     print(f'masks shape:{masks.shape}')
			
 
				     print(f'img shape:{img.shape}')
			
 
				     print(f'img shape:{img.shape}')
			
 
				-    # print(f'mask:{mask.shape}')
			
 
				+    # print(f'ins:{ins.shape}')
			
 
				     # mask_3channel = cv2.merge([np.zeros_like(masks[0]), np.zeros_like(masks[0]), masks[0]])
			
 
				     # print(f'mask_3channel:{mask_3channel.shape}')
			
 
				     img_tensor = torch.tensor(imgs[0], dtype=torch.uint8)
			
 
				     boxed_img = draw_bounding_boxes(img_tensor, boxes).permute(1, 2, 0).contiguous()
			
 
				     masked_img = draw_segmentation_masks(img_tensor, masks).permute(1, 2, 0).contiguous()
			
 
				     plt.imshow(imgs[0].permute(1, 2, 0))
			
 
				-    # plt.imshow(mask, cmap='Reds', alpha=0.5)
			
 
				+    # plt.imshow(ins, cmap='Reds', alpha=0.5)
			
 
				     plt.imshow(masked_img, cmap='Reds', alpha=0.3)
			
 
				     plt.imshow(boxed_img, cmap='Greens', alpha=0.5)
			
 
				     plt.show()
			
--- a/models/ins_detect/trainer.py
+++ b/models/ins_detect/trainer.py
@@ -303,7 +303,7 @@ def write_val_imgs(epoch, img, results, writer):
 
				     boxes = results[0]['boxes']
			
 
				     print(f'boxes shape:{boxes.shape}')
			
 
				     print(f'writer img shape:{img.shape}')
			
 
				-    # cv2.imshow('mask',masks[0].cpu().detach().numpy())
			
 
				+    # cv2.imshow('ins',masks[0].cpu().detach().numpy())
			
 
				     boxes = boxes.cpu().detach()
			
 
				     drawn_boxes = draw_bounding_boxes((img * 255).to(torch.uint8), boxes, colors="red", width=5)
			
 
				     print(f'drawn_boxes:{drawn_boxes.shape}')
			
--- a/models/line_detect/heads/head_losses.py
+++ b/models/line_detect/heads/head_losses.py
@@ -474,20 +474,20 @@ def find_max_heat_point_in_each_part(feature_map, box):
 
				     mask_q3 = (y_coords >= new_cy) & (x_coords < new_cx)  # 左下
			
 
				     mask_q4 = (y_coords >= new_cy) & (x_coords >= new_cx)  # 右下
			
 
				 
			
 
				-    # def process_region(mask):
			
 
				-    #     region = feature_map[:, :, mask].squeeze()
			
 
				+    # def process_region(ins):
			
 
				+    #     region = feature_map[:, :, ins].squeeze()
			
 
				     #     if len(region.shape) == 0:  # 如果区域为空，则跳过
			
 
				     #         return None, None
			
 
				     #     # 找到最大热度值的点及其位置
			
 
				     #     (y, x), heat_val = non_maximum_suppression(region[0])
			
 
				     #     # 将相对坐标转换回全局坐标
			
 
				-    #     y_global = y + torch.where(mask)[0].min().item()
			
 
				-    #     x_global = x + torch.where(mask)[1].min().item()
			
 
				+    #     y_global = y + torch.where(ins)[0].min().item()
			
 
				+    #     x_global = x + torch.where(ins)[1].min().item()
			
 
				     #     return (y_global, x_global), heat_val
			
 
				     #
			
 
				     # results = []
			
 
				-    # for mask in [mask_q1, mask_q2, mask_q3, mask_q4]:
			
 
				-    #     point, heat_val = process_region(mask)
			
 
				+    # for ins in [mask_q1, mask_q2, mask_q3, mask_q4]:
			
 
				+    #     point, heat_val = process_region(ins)
			
 
				     #     if point is not None:
			
 
				     #         # results.append((point[0], point[1], heat_val))
			
 
				     #         results.append((point[0], point[1]))
			
@@ -500,7 +500,7 @@ def find_max_heat_point_in_each_part(feature_map, box):
 
				     heatmap = feature_map[0]  # [H, W]
			
 
				 
			
 
				     def process_region(mask):
			
 
				-        # 应用 mask，只保留该区域
			
 
				+        # 应用 ins，只保留该区域
			
 
				         masked_heatmap = heatmap.clone()  # 复制以避免修改原数据
			
 
				         masked_heatmap[~mask] = 0  # 非区域置0
			
 
				 
			
@@ -513,7 +513,7 @@ def find_max_heat_point_in_each_part(feature_map, box):
 
				                 kernel_size (int): 池化窗口大小，用于比较是否为局部最大值
			
 
				 
			
 
				             Returns:
			
 
				-                torch.Tensor: 与 heatmap 同形状的 mask，局部最大值位置为 True
			
 
				+                torch.Tensor: 与 heatmap 同形状的 ins，局部最大值位置为 True
			
 
				             """
			
 
				             pad = (kernel_size - 1) // 2
			
 
				             max_pool = torch.nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=pad)
			
@@ -575,7 +575,7 @@ def find_max_heat_point_in_edge_centers(feature_map, box):
 
				         indexing='ij'
			
 
				     )
			
 
				 
			
 
				-    # ¶¨ÒåËÄ¸ö¡°±ßÖÐ¡±ÇøÓòµÄ mask
			
 
				+    # ¶¨ÒåËÄ¸ö¡°±ßÖÐ¡±ÇøÓòµÄ ins
			
 
				     mask1 = (x_coords < x_left) & (y_coords < y_top)
			
 
				     mask_top_middle    = (x_coords >= x_left) & (x_coords < x_right) & (y_coords < y_top)
			
 
				     mask3 = (x_coords >= x_right) & (y_coords < y_top)
			
@@ -831,7 +831,7 @@ def compute_mask_loss(feature_logits, proposals, gt_, pos_matched_idxs):
 
				         print(f'gs_heatmaps:{gs_heatmaps.shape}, line_logits.shape:{feature_logits.shape}')
			
 
				 
			
 
				         line_logits = feature_logits.squeeze(1)
			
 
				-        print(f'mask shape:{line_logits.shape}')
			
 
				+        print(f'ins shape:{line_logits.shape}')
			
 
				 
			
 
				         # line_loss = F.binary_cross_entropy_with_logits(line_logits, gs_heatmaps)
			
 
				 
			
@@ -1201,8 +1201,8 @@ def heatmaps_to_arc(maps, rois, threshold=0.5, output_size=(128, 128)):
 
				         output_size: resized size for uniform NMS
			
 
				 
			
 
				     Returns:
			
 
				-        masks: [N, 1, H, W] - binary mask aligned with input map
			
 
				-        scores: [N, 1] - count of non-zero pixels in each mask
			
 
				+        masks: [N, 1, H, W] - binary ins aligned with input map
			
 
				+        scores: [N, 1] - count of non-zero pixels in each ins
			
 
				     """
			
 
				     N, _, H, W = maps.shape
			
 
				     masks = torch.zeros((N, 1, H, W), dtype=torch.float32, device=maps.device)
			
--- a/models/line_detect/heads/ins/__init__.py
+++ b/models/line_detect/heads/ins/__init__.py
--- a/models/line_detect/heads/ins/ins_forward.py
+++ b/models/line_detect/heads/ins/ins_forward.py
@@ -0,0 +1,15 @@
 
				+from models.line_detect.heads.head_losses import features_align
			
 
				+
			
 
				+
			
 
				+def ins_forward(model, features, image_shapes, proposals):
			
 
				+    # print(f'circle_proposals:{len(proposals)}')
			
 
				+
			
 
				+    cs_features = features
			
 
				+
			
 
				+
			
 
				+    feature_logits = model.ins_decoder(cs_features)
			
 
				+    print(f'feature_logits from mask_decoder:{feature_logits.shape}')
			
 
				+    roi_features = features_align(feature_logits, proposals, image_shapes)
			
 
				+    if roi_features is not None:
			
 
				+        print(f'roi_features from align:{roi_features.shape}')
			
 
				+    return roi_features
			
--- a/models/line_detect/heads/ins/ins_losses.py
+++ b/models/line_detect/heads/ins/ins_losses.py
@@ -0,0 +1,121 @@
 
				+import torch
			
 
				+import torch.nn.functional as F
			
 
				+from torch import nn
			
 
				+
			
 
				+class DiceLoss(nn.Module):
			
 
				+    def __init__(self, smooth=1.):
			
 
				+        super(DiceLoss, self).__init__()
			
 
				+        self.smooth = smooth
			
 
				+
			
 
				+    def forward(self, logits, targets):
			
 
				+        probs = torch.sigmoid(logits)
			
 
				+        probs = probs.view(-1)
			
 
				+        targets = targets.view(-1).float()
			
 
				+
			
 
				+        intersection = (probs * targets).sum()
			
 
				+        dice = (2. * intersection + self.smooth) / (probs.sum() + targets.sum() + self.smooth)
			
 
				+        return 1. - dice
			
 
				+
			
 
				+bce_loss = nn.BCEWithLogitsLoss()
			
 
				+dice_loss = DiceLoss()
			
 
				+
			
 
				+def combined_loss(preds, targets, alpha=0.5):
			
 
				+    bce = bce_loss(preds, targets)
			
 
				+    d = dice_loss(preds, targets)
			
 
				+    return alpha * bce + (1 - alpha) * d
			
 
				+
			
 
				+def align_masks(keypoints, rois, heatmap_size):
			
 
				+    print(f'rois:{rois.shape}')
			
 
				+    print(f'heatmap_size:{heatmap_size}')
			
 
				+
			
 
				+    print(f'keypoints.shape:{keypoints.shape}')
			
 
				+    # batch_size, num_keypoints, _ = keypoints.shape
			
 
				+    t_h, t_w = keypoints.shape[-2:]
			
 
				+    scale=heatmap_size/t_w
			
 
				+    print(f'scale:{scale}')
			
 
				+    x = keypoints[..., 0]*scale
			
 
				+    y = keypoints[..., 1]*scale
			
 
				+
			
 
				+    x = x.unsqueeze(1)
			
 
				+    y = y.unsqueeze(1)
			
 
				+
			
 
				+    num_points=x.shape[2]
			
 
				+    print(f'num_points:{num_points}')
			
 
				+    mask_4d = keypoints.unsqueeze(1).float()
			
 
				+    resized_mask = F.interpolate(
			
 
				+        mask_4d,
			
 
				+    size = (heatmap_size, heatmap_size),
			
 
				+    mode = 'bilinear',
			
 
				+    align_corners = False
			
 
				+    ).squeeze(1)  # [B,heatmap_size,heatmap_size]
			
 
				+    # plt.imshow(resized_mask[0].cpu())
			
 
				+    # plt.show()
			
 
				+    print(f'resized_mask:{resized_mask.shape}')
			
 
				+    return resized_mask
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def compute_ins_loss(feature_logits, proposals, gt_, pos_matched_idxs):
			
 
				+    print(f'compute_arc_loss:{feature_logits.shape}')
			
 
				+    N, K, H, W = feature_logits.shape
			
 
				+
			
 
				+    len_proposals = len(proposals)
			
 
				+
			
 
				+    empty_count = 0
			
 
				+    non_empty_count = 0
			
 
				+
			
 
				+    for prop in proposals:
			
 
				+        if prop.shape[0] == 0:
			
 
				+            empty_count += 1
			
 
				+        else:
			
 
				+            non_empty_count += 1
			
 
				+
			
 
				+    print(f"Empty proposals count: {empty_count}")
			
 
				+    print(f"Non-empty proposals count: {non_empty_count}")
			
 
				+
			
 
				+    print(f'starte to compute_point_loss')
			
 
				+    print(f'compute_point_loss line_logits.shape:{feature_logits.shape},len_proposals:{len_proposals}')
			
 
				+    if H != W:
			
 
				+        raise ValueError(
			
 
				+            f"line_logits height and width (last two elements of shape) should be equal. Instead got H = {H} and W = {W}"
			
 
				+        )
			
 
				+    discretization_size = H
			
 
				+
			
 
				+    gs_heatmaps = []
			
 
				+    # print(f'point_matched_idxs:{point_matched_idxs}')
			
 
				+    print(f'gt_masks:{gt_[0].shape}')
			
 
				+    for proposals_per_image, gt_kp_in_image, midx in zip(proposals, gt_, pos_matched_idxs):
			
 
				+        # [
			
 
				+        #   (Tensor(38, 4), Tensor(1, 57, 2), Tensor(38, 1)),
			
 
				+        #   (Tensor(65, 4), Tensor(1, 74, 2), Tensor(65, 1))
			
 
				+        # ]
			
 
				+        print(f'proposals_per_image:{proposals_per_image.shape}')
			
 
				+        kp = gt_kp_in_image[midx]
			
 
				+        t_h, t_w = kp.shape[-2:]
			
 
				+        print(f't_h:{t_h}, t_w:{t_w}')
			
 
				+
			
 
				+        print(f'gt_kp_in_image:{gt_kp_in_image.shape}')
			
 
				+        if proposals_per_image.shape[0] > 0 and gt_kp_in_image.shape[0] > 0:
			
 
				+
			
 
				+            gs_heatmaps_per_img = align_masks(kp, proposals_per_image, discretization_size)
			
 
				+            gs_heatmaps.append(gs_heatmaps_per_img)
			
 
				+
			
 
				+    if len(gs_heatmaps)>0:
			
 
				+        gs_heatmaps = torch.cat(gs_heatmaps, dim=0)
			
 
				+        print(f'gs_heatmaps:{gs_heatmaps.shape}, line_logits.shape:{feature_logits.shape}')
			
 
				+
			
 
				+        line_logits = feature_logits.squeeze(1)
			
 
				+        print(f'mask shape:{line_logits.shape}')
			
 
				+
			
 
				+        # line_loss = F.binary_cross_entropy_with_logits(line_logits, gs_heatmaps)
			
 
				+
			
 
				+        # line_loss = F.cross_entropy(line_logits, gs_heatmaps)
			
 
				+        line_loss=combined_loss(line_logits, gs_heatmaps)
			
 
				+
			
 
				+    else:
			
 
				+        line_loss=100
			
 
				+
			
 
				+    print("d")
			
 
				+
			
 
				+    return line_loss
			
--- a/models/line_detect/heads/ins/ins_predictor.py
+++ b/models/line_detect/heads/ins/ins_predictor.py
@@ -0,0 +1,51 @@
 
				+import torch
			
 
				+from torch import nn
			
 
				+import torch
			
 
				+
			
 
				+import torch.nn.functional as F
			
 
				+
			
 
				+
			
 
				+
			
 
				+class ArcEquationPredictor(nn.Module):
			
 
				+    def __init__(self, h=512, w=672, num_outputs=7):
			
 
				+        super().__init__()
			
 
				+        self.h = h
			
 
				+        self.w = w
			
 
				+        self.num_outputs = num_outputs
			
 
				+        # Fully connected layer to map flattened feature map to arc parameters
			
 
				+        self.fc = nn.Linear(h * w, num_outputs)
			
 
				+
			
 
				+    def forward(self, feature_logits, arc_pos_matched_idxs):
			
 
				+        """
			
 
				+        Args:
			
 
				+            feature_logits (Tensor): shape [total_num_boxes, 1, H, W],
			
 
				+                                     contains all proposals from all images in the batch.
			
 
				+            arc_pos_matched_idxs (list[Tensor]): list of length B,
			
 
				+                                     only used for reference, not used for splitting.
			
 
				+        Returns:
			
 
				+            arc_params (Tensor): shape [total_num_boxes, num_outputs],
			
 
				+                                 predicted arc parameters for all proposals.
			
 
				+        """
			
 
				+
			
 
				+        assert feature_logits.dim() == 4 and feature_logits.shape[1] == 1, \
			
 
				+            f"Expected [total_num_boxes, 1, H, W], got {feature_logits.shape}"
			
 
				+
			
 
				+        total_num_boxes, _, H, W = feature_logits.shape
			
 
				+
			
 
				+        # Flatten spatial dimensions
			
 
				+        x = feature_logits.view(total_num_boxes, -1)  # [total_num_boxes, H*W]
			
 
				+
			
 
				+        # Predict arc parameters for each proposal
			
 
				+        arc_params = self.fc(x)  # [total_num_boxes, num_outputs]
			
 
				+
			
 
				+        # Map raw outputs into valid ranges
			
 
				+        arc_params[..., 0] = torch.sigmoid(arc_params[..., 0]) * self.w  # cx
			
 
				+        arc_params[..., 1] = torch.sigmoid(arc_params[..., 1]) * self.h  # cy
			
 
				+        arc_params[..., 2] = F.relu(arc_params[..., 2])  # long_axis
			
 
				+        arc_params[..., 3] = F.relu(arc_params[..., 3])  # short_axis
			
 
				+        arc_params[..., 4] = torch.sigmoid(arc_params[..., 4]) * 2 * 3.1415926  # Â¦Ã1
			
 
				+        arc_params[..., 5] = torch.sigmoid(arc_params[..., 5]) * 2 * 3.1415926  # Â¦Ã2
			
 
				+        arc_params[..., 6] = torch.sigmoid(arc_params[..., 6]) * 2 * 3.1415926  # Â¦Ã3
			
 
				+
			
 
				+        # Directly return all predictions together
			
 
				+        return arc_params
			
--- a/models/line_detect/line_detect.py
+++ b/models/line_detect/line_detect.py
@@ -14,6 +14,7 @@ from models.line_detect.heads.circle.circle_heads import CircleHeads, CirclePred
 
				 from .heads.decoder import FPNDecoder
			
 
				 from models.line_detect.heads.line.line_heads import LinePredictor
			
 
				 from models.line_detect.heads.point.point_heads import PointHeads, PointPredictor
			
 
				+from .heads.ins.ins_predictor import ArcEquationPredictor
			
 
				 from .loi_heads import RoIHeads
			
 
				 
			
 
				 from .trainer import Trainer
			
@@ -86,8 +87,8 @@ class LineDetect(BaseDetectionNet):
 
				             point_head=None,
			
 
				             point_predictor=None,
			
 
				 
			
 
				-            circle_head=None,
			
 
				-            circle_predictor=None,
			
 
				+            ins_head=None,
			
 
				+            ins_predictor=None,
			
 
				             circle_roi_pool=None,
			
 
				 
			
 
				     # arc parameters
			
@@ -98,7 +99,7 @@ class LineDetect(BaseDetectionNet):
 
				             detect_point=False,
			
 
				             detect_line=False,
			
 
				             detect_arc=True,
			
 
				-            detect_circle=False,
			
 
				+            detect_ins=False,
			
 
				             **kwargs,
			
 
				 
			
 
				     ):
			
@@ -154,7 +155,7 @@ class LineDetect(BaseDetectionNet):
 
				             detect_point=detect_point,
			
 
				             detect_line=detect_line,
			
 
				             detect_arc=detect_arc,
			
 
				-            detect_circle=detect_circle,
			
 
				+            detect_circle=detect_ins,
			
 
				         )
			
 
				 
			
 
				         if image_mean is None:
			
@@ -191,15 +192,16 @@ class LineDetect(BaseDetectionNet):
 
				             # arc_predictor=ArcPredictor(in_channels=256,out_channels=1)
			
 
				             arc_predictor=FPNDecoder(Bottleneck)
			
 
				 
			
 
				-        if detect_circle and circle_head is None:
			
 
				+        if detect_ins and ins_head is None:
			
 
				             layers = tuple(num_points for _ in range(8))
			
 
				-            circle_head = CircleHeads(8, layers)
			
 
				+            ins_head = FPNDecoder(Bottleneck)
			
 
				 
			
 
				-        if detect_circle and circle_predictor is None:
			
 
				-            layers = tuple(num_points for _ in range(8))
			
 
				+        if detect_ins and ins_predictor is None:
			
 
				+            # layers = tuple(num_points for _ in range(8))
			
 
				             # arc_predictor=ArcPredictor(in_channels=256,out_channels=1)
			
 
				             # circle_predictor = CirclePredictor(in_channels=256,out_channels=4)
			
 
				-            circle_predictor=FPNDecoder(Bottleneck)
			
 
				+            ins_predictor=ArcEquationPredictor()
			
 
				+
			
 
				 
			
 
				 
			
 
				 
			
@@ -215,9 +217,9 @@ class LineDetect(BaseDetectionNet):
 
				         self.roi_heads.arc_head = arc_head
			
 
				         self.roi_heads.arc_predictor = arc_predictor
			
 
				 
			
 
				-        self.roi_heads.circle_roi_pool = circle_roi_pool
			
 
				-        self.roi_heads.circle_head = circle_head
			
 
				-        self.roi_heads.circle_predictor = circle_predictor
			
 
				+        self.roi_heads.ins_roi_pool = circle_roi_pool
			
 
				+        self.roi_heads.ins_head = ins_head
			
 
				+        self.roi_heads.ins_predictor = ins_predictor
			
 
				 
			
 
				     def start_train(self, cfg):
			
 
				         # cfg = read_yaml(cfg)
			
@@ -363,12 +365,12 @@ def linedetect_newresnet18fpn(
 
				     anchor_generator =  AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
			
 
				 
			
 
				     model = LineDetect(backbone,
			
 
				-                       num_classes,min_size=size,max_size=size, num_points=num_points,
			
 
				+                       num_classes, min_size=size, max_size=size, num_points=num_points,
			
 
				                        rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
			
 
				                        detect_point=False,
			
 
				                        detect_line=False,
			
 
				                        detect_arc=False,
			
 
				-                       detect_circle=True,
			
 
				+                       detect_ins=True,
			
 
				 
			
 
				                        **kwargs)
			
 
				 
			
@@ -406,11 +408,11 @@ def linedetect_newresnet50fpn(
 
				 
			
 
				     anchor_generator =  AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
			
 
				 
			
 
				-    model = LineDetect(backbone, num_classes,min_size=size,max_size=size, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
			
 
				+    model = LineDetect(backbone, num_classes, min_size=size, max_size=size, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
			
 
				                        detect_point=False,
			
 
				                        detect_line=False,
			
 
				                        detect_arc=False,
			
 
				-                       detect_circle=True,
			
 
				+                       detect_ins=True,
			
 
				                        **kwargs)
			
 
				 
			
 
				 
			
@@ -450,11 +452,11 @@ def linedetect_newresnet101fpn(
 
				 
			
 
				     anchor_generator =  AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
			
 
				 
			
 
				-    model = LineDetect(backbone, num_classes,min_size=size,max_size=size, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
			
 
				+    model = LineDetect(backbone, num_classes, min_size=size, max_size=size, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
			
 
				                        detect_point=False,
			
 
				                        detect_line=False,
			
 
				                        detect_arc=False,
			
 
				-                       detect_circle=True,
			
 
				+                       detect_ins=True,
			
 
				                        **kwargs)
			
 
				 
			
 
				     return model
			
@@ -492,12 +494,12 @@ def linedetect_newresnet152fpn(
 
				 
			
 
				     anchor_generator =  AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios)
			
 
				 
			
 
				-    model = LineDetect(backbone, num_classes,min_size=size,max_size=size, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
			
 
				+    model = LineDetect(backbone, num_classes, min_size=size, max_size=size, num_points=num_points, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,
			
 
				 
			
 
				                        detect_point=False,
			
 
				                        detect_line=False,
			
 
				                        detect_arc=False,
			
 
				-                       detect_circle=True,
			
 
				+                       detect_ins=True,
			
 
				                        **kwargs)
			
 
				 
			
 
				     return model
			
@@ -539,7 +541,7 @@ def linedetect_efficientnet(
 
				         detect_line=False,
			
 
				         detect_point=False,
			
 
				         detect_arc=False,
			
 
				-        detect_circle=True,
			
 
				+        detect_ins=True,
			
 
				     )
			
 
				     return model
			
 
				 
			
@@ -592,7 +594,7 @@ def linedetect_maxvitfpn(
 
				         detect_line=False,
			
 
				         detect_point=False,
			
 
				         detect_arc=False,
			
 
				-        detect_circle=True,
			
 
				+        detect_ins=True,
			
 
				     )
			
 
				     return model
			
 
				 
			
--- a/models/line_detect/loi_heads.py
+++ b/models/line_detect/loi_heads.py
@@ -61,18 +61,18 @@ def maskrcnn_inference(x, labels):
 
				     # type: (Tensor, List[Tensor]) -> List[Tensor]
			
 
				     """
			
 
				     From the results of the CNN, post process the masks
			
 
				-    by taking the mask corresponding to the class with max
			
 
				+    by taking the ins corresponding to the class with max
			
 
				     probability (which are of fixed size and directly output
			
 
				-    by the CNN) and return the masks in the mask field of the BoxList.
			
 
				+    by the CNN) and return the masks in the ins field of the BoxList.
			
 
				 
			
 
				     Args:
			
 
				-        x (Tensor): the mask logits
			
 
				+        x (Tensor): the ins logits
			
 
				         labels (list[BoxList]): bounding boxes that are used as
			
 
				             reference, one for ech image
			
 
				 
			
 
				     Returns:
			
 
				         results (list[BoxList]): one BoxList for each image, containing
			
 
				-            the extra field mask
			
 
				+            the extra field ins
			
 
				     """
			
 
				     mask_prob = x.sigmoid()
			
 
				 
			
@@ -427,7 +427,7 @@ def paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, -1, -1))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
@@ -453,7 +453,7 @@ def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
@@ -537,9 +537,9 @@ class RoIHeads(nn.Module):
 
				             point_head=None,
			
 
				             point_predictor=None,
			
 
				 
			
 
				-            circle_head=None,
			
 
				-            circle_predictor=None,
			
 
				-            circle_roi_pool=None,
			
 
				+            ins_head=None,
			
 
				+            ins_predictor=None,
			
 
				+            ins_roi_pool=None,
			
 
				 
			
 
				             # arc parameters
			
 
				             arc_roi_pool=None,
			
@@ -591,9 +591,9 @@ class RoIHeads(nn.Module):
 
				         self.arc_head = arc_head
			
 
				         self.arc_predictor = arc_predictor
			
 
				 
			
 
				-        self.circle_roi_pool = circle_roi_pool
			
 
				-        self.circle_head = circle_head
			
 
				-        self.circle_predictor = circle_predictor
			
 
				+        self.ins_roi_pool = ins_roi_pool
			
 
				+        self.ins_head = ins_head
			
 
				+        self.ins_predictor = ins_predictor
			
 
				 
			
 
				 
			
 
				 
			
@@ -661,10 +661,10 @@ class RoIHeads(nn.Module):
 
				         #     return False
			
 
				         return True
			
 
				 
			
 
				-    def has_circle(self):
			
 
				+    def has_ins(self):
			
 
				         # if self.line_roi_pool is None:
			
 
				         #     return False
			
 
				-        if self.circle_head is None:
			
 
				+        if self.ins_head is None:
			
 
				             return False
			
 
				         # if self.line_predictor is None:
			
 
				         #     return False
			
@@ -1321,7 +1321,7 @@ class RoIHeads(nn.Module):
 
				                 losses.update(loss_arc)
			
 
				                 print(f'losses:{losses}')
			
 
				 
			
 
				-        if self.has_circle and self.detect_circle:
			
 
				+        if self.has_ins and self.detect_circle:
			
 
				             print(f'roi_heads forward has_circle()!!!!')
			
 
				             # print(f'labels:{labels}')
			
 
				             circle_proposals = [p["boxes"] for p in result]
			
@@ -1371,7 +1371,7 @@ class RoIHeads(nn.Module):
 
				 
			
 
				 
			
 
				                 print(f'features from backbone:{features['0'].shape}')
			
 
				-                feature_logits = self.circle_forward1(features, image_shapes, circle_proposals)
			
 
				+                feature_logits = self.ins_forward1(features, image_shapes, circle_proposals)
			
 
				 
			
 
				                 loss_circle = None
			
 
				                 loss_circle_extra=None
			
@@ -1666,7 +1666,7 @@ class RoIHeads(nn.Module):
 
				             print(f'roi_features from align:{roi_features.shape}')
			
 
				         return roi_features
			
 
				 
			
 
				-    def circle_forward1(self, features, image_shapes, proposals):
			
 
				+    def ins_forward1(self, features, image_shapes, proposals):
			
 
				         print(f'circle_proposals:{len(proposals)}')
			
 
				         # cs_features= features['0']
			
 
				         # print(f'features-0:{features['0'].shape}')
			
@@ -1682,7 +1682,7 @@ class RoIHeads(nn.Module):
 
				         # point_proposals_tensor = torch.cat(proposals)
			
 
				         # print(f'point_proposals_tensor:{point_proposals_tensor.shape}')
			
 
				 
			
 
				-        feature_logits = self.circle_predictor(cs_features)
			
 
				+        feature_logits = self.ins_head(cs_features)
			
 
				         print(f'feature_logits from circle_head:{feature_logits.shape}')
			
 
				 
			
 
				         roi_features = features_align(feature_logits, proposals, image_shapes)
			
--- a/models/line_net/roi_heads.py
+++ b/models/line_net/roi_heads.py
@@ -326,18 +326,18 @@ def maskrcnn_inference(x, labels):
 
				     # type: (Tensor, List[Tensor]) -> List[Tensor]
			
 
				     """
			
 
				     From the results of the CNN, post process the masks
			
 
				-    by taking the mask corresponding to the class with max
			
 
				+    by taking the ins corresponding to the class with max
			
 
				     probability (which are of fixed size and directly output
			
 
				-    by the CNN) and return the masks in the mask field of the BoxList.
			
 
				+    by the CNN) and return the masks in the ins field of the BoxList.
			
 
				 
			
 
				     Args:
			
 
				-        x (Tensor): the mask logits
			
 
				+        x (Tensor): the ins logits
			
 
				         labels (list[BoxList]): bounding boxes that are used as
			
 
				             reference, one for ech image
			
 
				 
			
 
				     Returns:
			
 
				         results (list[BoxList]): one BoxList for each image, containing
			
 
				-            the extra field mask
			
 
				+            the extra field ins
			
 
				     """
			
 
				     mask_prob = x.sigmoid()
			
 
				 
			
@@ -684,7 +684,7 @@ def paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, -1, -1))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
@@ -710,7 +710,7 @@ def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
--- a/models/wirenet/_utils.py
+++ b/models/wirenet/_utils.py
@@ -58,7 +58,7 @@ class BalancedPositiveNegativeSampler:
 
				             pos_idx_per_image = positive[perm1]
			
 
				             neg_idx_per_image = negative[perm2]
			
 
				 
			
 
				-            # create binary mask from indices
			
 
				+            # create binary ins from indices
			
 
				             pos_idx_per_image_mask = torch.zeros_like(matched_idxs_per_image, dtype=torch.uint8)
			
 
				             neg_idx_per_image_mask = torch.zeros_like(matched_idxs_per_image, dtype=torch.uint8)
			
 
				 
			
--- a/models/wirenet/head.py
+++ b/models/wirenet/head.py
@@ -246,18 +246,18 @@ def maskrcnn_inference(x, labels):
 
				     # type: (Tensor, List[Tensor]) -> List[Tensor]
			
 
				     """
			
 
				     From the results of the CNN, post process the masks
			
 
				-    by taking the mask corresponding to the class with max
			
 
				+    by taking the ins corresponding to the class with max
			
 
				     probability (which are of fixed size and directly output
			
 
				-    by the CNN) and return the masks in the mask field of the BoxList.
			
 
				+    by the CNN) and return the masks in the ins field of the BoxList.
			
 
				 
			
 
				     Args:
			
 
				-        x (Tensor): the mask logits
			
 
				+        x (Tensor): the ins logits
			
 
				         labels (list[BoxList]): bounding boxes that are used as
			
 
				             reference, one for ech image
			
 
				 
			
 
				     Returns:
			
 
				         results (list[BoxList]): one BoxList for each image, containing
			
 
				-            the extra field mask
			
 
				+            the extra field ins
			
 
				     """
			
 
				     mask_prob = x.sigmoid()
			
 
				 
			
@@ -301,15 +301,15 @@ def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs
 
				 
			
 
				     discretization_size = mask_logits.shape[-1]
			
 
				     # print(f'mask_logits:{mask_logits},gt_masks:{gt_masks},,gt_labels:{gt_labels}]')
			
 
				-    # print(f'mask discretization_size:{discretization_size}')
			
 
				+    # print(f'ins discretization_size:{discretization_size}')
			
 
				     labels = [gt_label[idxs] for gt_label, idxs in zip(gt_labels, mask_matched_idxs)]
			
 
				-    # print(f'mask labels:{labels}')
			
 
				+    # print(f'ins labels:{labels}')
			
 
				     mask_targets = [
			
 
				         project_masks_on_boxes(m, p, i, discretization_size) for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)
			
 
				     ]
			
 
				 
			
 
				     labels = torch.cat(labels, dim=0)
			
 
				-    # print(f'mask labels1:{labels}')
			
 
				+    # print(f'ins labels1:{labels}')
			
 
				     mask_targets = torch.cat(mask_targets, dim=0)
			
 
				 
			
 
				     # torch.mean (in binary_cross_entropy_with_logits) doesn't
			
@@ -612,7 +612,7 @@ def paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, -1, -1))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
@@ -638,7 +638,7 @@ def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
--- a/models/wirenet/roi_head.py
+++ b/models/wirenet/roi_head.py
@@ -54,18 +54,18 @@ def maskrcnn_inference(x, labels):
 
				     # type: (Tensor, List[Tensor]) -> List[Tensor]
			
 
				     """
			
 
				     From the results of the CNN, post process the masks
			
 
				-    by taking the mask corresponding to the class with max
			
 
				+    by taking the ins corresponding to the class with max
			
 
				     probability (which are of fixed size and directly output
			
 
				-    by the CNN) and return the masks in the mask field of the BoxList.
			
 
				+    by the CNN) and return the masks in the ins field of the BoxList.
			
 
				 
			
 
				     Args:
			
 
				-        x (Tensor): the mask logits
			
 
				+        x (Tensor): the ins logits
			
 
				         labels (list[BoxList]): bounding boxes that are used as
			
 
				             reference, one for ech image
			
 
				 
			
 
				     Returns:
			
 
				         results (list[BoxList]): one BoxList for each image, containing
			
 
				-            the extra field mask
			
 
				+            the extra field ins
			
 
				     """
			
 
				     mask_prob = x.sigmoid()
			
 
				 
			
@@ -109,15 +109,15 @@ def maskrcnn_loss(mask_logits, proposals, gt_masks, gt_labels, mask_matched_idxs
 
				 
			
 
				     discretization_size = mask_logits.shape[-1]
			
 
				     # print(f'mask_logits:{mask_logits},gt_masks:{gt_masks},,gt_labels:{gt_labels}]')
			
 
				-    # print(f'mask discretization_size:{discretization_size}')
			
 
				+    # print(f'ins discretization_size:{discretization_size}')
			
 
				     labels = [gt_label[idxs] for gt_label, idxs in zip(gt_labels, mask_matched_idxs)]
			
 
				-    # print(f'mask labels:{labels}')
			
 
				+    # print(f'ins labels:{labels}')
			
 
				     mask_targets = [
			
 
				         project_masks_on_boxes(m, p, i, discretization_size) for m, p, i in zip(gt_masks, proposals, mask_matched_idxs)
			
 
				     ]
			
 
				 
			
 
				     labels = torch.cat(labels, dim=0)
			
 
				-    # print(f'mask labels1:{labels}')
			
 
				+    # print(f'ins labels1:{labels}')
			
 
				     mask_targets = torch.cat(mask_targets, dim=0)
			
 
				 
			
 
				     # torch.mean (in binary_cross_entropy_with_logits) doesn't
			
@@ -420,7 +420,7 @@ def paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, -1, -1))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
@@ -446,7 +446,7 @@ def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
--- a/models/wirenet2/WirePredictor.py
+++ b/models/wirenet2/WirePredictor.py
@@ -23,7 +23,7 @@ class WirePredictor(nn.Module):
 
				         )
			
 
				         self.decoder1 = self._block(features * 2, features, name="dec1")
			
 
				 
			
 
				-        # Output for line segment mask
			
 
				+        # Output for line segment ins
			
 
				         self.conv_mask = nn.Conv2d(
			
 
				             in_channels=features, out_channels=out_channels, kernel_size=1
			
 
				         )
			
--- a/models/wirenet2/_utils.py
+++ b/models/wirenet2/_utils.py
@@ -58,7 +58,7 @@ class BalancedPositiveNegativeSampler:
 
				             pos_idx_per_image = positive[perm1]
			
 
				             neg_idx_per_image = negative[perm2]
			
 
				 
			
 
				-            # create binary mask from indices
			
 
				+            # create binary ins from indices
			
 
				             pos_idx_per_image_mask = torch.zeros_like(matched_idxs_per_image, dtype=torch.uint8)
			
 
				             neg_idx_per_image_mask = torch.zeros_like(matched_idxs_per_image, dtype=torch.uint8)
			
 
				 
			
--- a/models/wirenet2/roi_heads.py
+++ b/models/wirenet2/roi_heads.py
@@ -53,18 +53,18 @@ def maskrcnn_inference(x, labels):
 
				     # type: (Tensor, List[Tensor]) -> List[Tensor]
			
 
				     """
			
 
				     From the results of the CNN, post process the masks
			
 
				-    by taking the mask corresponding to the class with max
			
 
				+    by taking the ins corresponding to the class with max
			
 
				     probability (which are of fixed size and directly output
			
 
				-    by the CNN) and return the masks in the mask field of the BoxList.
			
 
				+    by the CNN) and return the masks in the ins field of the BoxList.
			
 
				 
			
 
				     Args:
			
 
				-        x (Tensor): the mask logits
			
 
				+        x (Tensor): the ins logits
			
 
				         labels (list[BoxList]): bounding boxes that are used as
			
 
				             reference, one for ech image
			
 
				 
			
 
				     Returns:
			
 
				         results (list[BoxList]): one BoxList for each image, containing
			
 
				-            the extra field mask
			
 
				+            the extra field ins
			
 
				     """
			
 
				     mask_prob = x.sigmoid()
			
 
				 
			
@@ -414,7 +414,7 @@ def paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, -1, -1))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(h, w), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
@@ -440,7 +440,7 @@ def _onnx_paste_mask_in_image(mask, box, im_h, im_w):
 
				     # Set shape to [batchxCxHxW]
			
 
				     mask = mask.expand((1, 1, mask.size(0), mask.size(1)))
			
 
				 
			
 
				-    # Resize mask
			
 
				+    # Resize ins
			
 
				     mask = F.interpolate(mask, size=(int(h), int(w)), mode="bilinear", align_corners=False)
			
 
				     mask = mask[0][0]
			
 
				 
			
--- a/tools/train.py
+++ b/tools/train.py
@@ -43,7 +43,7 @@ def copypaste_collate_fn(batch):
 
				 def get_dataset(is_train, args):
			
 
				     image_set = "train" if is_train else "val"
			
 
				     num_classes, mode = {"coco": (91, "instances"), "coco_kp": (2, "person_keypoints")}[args.dataset]
			
 
				-    with_masks = "mask" in args.model
			
 
				+    with_masks = "ins" in args.model
			
 
				     ds = get_coco(
			
 
				         root=args.data_path,
			
 
				         image_set=image_set,
			
--- a/tools/transforms.py
+++ b/tools/transforms.py
@@ -522,7 +522,7 @@ def _copy_paste(
 
				         out_target["area"] = out_target["masks"].sum((-1, -2)).to(torch.float32)
			
 
				 
			
 
				     if "iscrowd" in target and "iscrowd" in paste_target:
			
 
				-        # target['iscrowd'] size can be differ from mask size (non_all_zero_masks)
			
 
				+        # target['iscrowd'] size can be differ from ins size (non_all_zero_masks)
			
 
				         # For example, if previous transforms geometrically modifies masks/boxes/labels but
			
 
				         # does not update "iscrowd"
			
 
				         if len(target["iscrowd"]) == len(non_all_zero_masks):