Your Name 2 тижнів тому
батько
коміт
aafc368df2

+ 21 - 0
libs/vision_libs/models/detection/transform.py

@@ -227,6 +227,16 @@ class GeneralizedRCNNTransform(nn.Module):
             arc_mask = target["circles"]
             arc_mask = resize_keypoints(arc_mask, (h, w), image.shape[-2:])
             target["circles"] = arc_mask
+
+
+        if "mask_ends" in target:
+            arc_mask = target["mask_ends"]
+            arc_mask = resize_keypoints(arc_mask, (h, w), image.shape[-2:])
+            target["mask_ends"] = arc_mask
+        if "mask_params" in target:
+            arc_mask = target["mask_params"]
+            arc_mask = resize_keypoints(arc_mask, (h, w), image.shape[-2:])
+            target["mask_params"] = arc_mask
         return image, target
 
     # _onnx_batch_images() is an implementation of
@@ -319,6 +329,17 @@ class GeneralizedRCNNTransform(nn.Module):
                 masks = pred["circle_masks"]
                 masks = paste_masks_in_image(masks, boxes, o_im_s)
                 result[i]["circle_masks"] = masks
+
+            if "mask_ends" in pred:
+                arc_mask = pred["mask_ends"]
+                arc_mask = resize_keypoints(arc_mask,im_s, o_im_s)
+                result[i]["mask_ends"] = arc_mask
+            if "mask_params" in pred:
+                arc_mask = pred["mask_params"]
+                arc_mask = resize_keypoints(arc_mask, im_s, o_im_s)
+                result[i]["mask_params"] = arc_mask
+
+
         return result
 
     def __repr__(self) -> str:

+ 85 - 2
models/line_detect/heads/arc/arc_heads.py

@@ -84,6 +84,7 @@ class ArcEquationHead(nn.Module):
         feature_logits: [N, 1, H, W]
         """
         N, _, H, W = feature_logits.shape
+        print(f'N:{N}, H:{H}, W:{W}')
 
         # --------------------------------------------
         # Global average pooling
@@ -99,14 +100,19 @@ class ArcEquationHead(nn.Module):
         # --------------------------------------------
         # Parameter constraints
         # --------------------------------------------
+        # H=1500
+        # W = 2000
 
         # Ellipse center
         arc_params[..., 0] = torch.sigmoid(arc_params[..., 0]) * W   # cx in image width range
         arc_params[..., 1] = torch.sigmoid(arc_params[..., 1]) * H   # cy in image height range
 
+
         # Axes lengths must be positive
-        arc_params[..., 2] = F.relu(arc_params[..., 2]) + 1e-6       # a > 0
-        arc_params[..., 3] = F.relu(arc_params[..., 3]) + 1e-6       # b > 0
+        arc_params[..., 2] = torch.sigmoid(arc_params[..., 2]) * W  # cx in image width range
+        arc_params[..., 3] = torch.sigmoid(arc_params[..., 3]) * W  # cy in image height range
+        # arc_params[..., 2] = F.relu(arc_params[..., 2]) + 1e-6       # a > 0
+        # arc_params[..., 3] = F.relu(arc_params[..., 3]) + 1e-6       # b > 0
 
         # Angle between 0~2¦Ð
         arc_params[..., 4] = torch.sigmoid(arc_params[..., 4]) * (2 * 3.1415926535)
@@ -120,4 +126,81 @@ class ArcEquationHead(nn.Module):
         arc_params[..., 7] = torch.sigmoid(arc_params[..., 7]) * W  # x auxiliary
         arc_params[..., 8] = torch.sigmoid(arc_params[..., 8]) * H  # y auxiliary
 
+        print(f'arc_params in head:{arc_params}')
+
         return arc_params
+
+
+
+# class ArcEquationHead(nn.Module):
+#     """
+#     Input:
+#         feature_logits : [N, 1, H, W]  # N:Ô²»¡Êý£¬H/W:ÌØÕ÷ͼ³ß´ç£¨¶ÔӦԭʼͼÏñ¿Õ¼äλÖã©
+#     Output:
+#         arc_params : [N, 9]  # [cx, cy, a, b, theta, x1, y1, x2, y2]
+#     """
+#
+#     def __init__(self, num_outputs=9, hidden=1024, feat_size=(672, 672)):
+#         super().__init__()
+#         self.feat_H, self.feat_W = feat_size  # ÌØÕ÷ͼµÄ¹Ì¶¨³ß´ç£¨ÐèÓëfeature_logitsÒ»Ö£©
+#
+#         self.flatten = nn.Flatten()
+#         self.input_dim = self.feat_H * self.feat_W  # ÊäÈëά¶È£ºH*W£¨¶ø·Ç1£©
+#
+#         self.mlp = nn.Sequential(
+#             nn.Linear(self.input_dim, hidden),
+#             nn.ReLU(inplace=True),
+#             nn.Dropout(0.2),  # ·ÀÖ¹¹ýÄâºÏ
+#             nn.Linear(hidden, hidden // 2),
+#             nn.ReLU(inplace=True),
+#             nn.Dropout(0.1),
+#             nn.Linear(hidden // 2, num_outputs)
+#         )
+#
+#         self._init_weights()
+#
+#     def _init_weights(self):
+#         for m in self.mlp.modules():
+#             if isinstance(m, nn.Linear):
+#                 nn.init.xavier_uniform_(m.weight)  # ¾ùÔȳõʼ»¯£¬±ÜÃâÊä³ö¼¯ÖÐ
+#                 if m.bias is not None:
+#                     nn.init.zeros_(m.bias)
+#
+#     def forward(self, feature_logits):
+#         N, C, H, W = feature_logits.shape
+#         assert H == self.feat_H and W == self.feat_W, "ÌØÕ÷ͼ³ß´çÐèÓë³õʼ»¯Ê±µÄfeat_sizeÒ»ÖÂ"
+#
+#         # 1. Flatten¿Õ¼äÌØÕ÷£º[N,1,H,W] ¡ú [N, H*W]£¨±£Áôÿ¸öÏñËØµÄ¿Õ¼äÐÅÏ¢£©
+#         x = self.flatten(feature_logits)  # [N, H*W]
+#
+#         # 2. MLPÔ¤²âԭʼ²ÎÊý
+#         arc_params = self.mlp(x)  # [N,9]
+#
+#         # 3. ÓÅ»¯²ÎÊýÔ¼Êø£¨±ÜÃâÖÐÐÄ/Ö᳤Òì³££©
+#         # ÍÖÔ²ÖÐÐÄ£ºÓ³Éäµ½ÌØÕ÷ͼ³ß´ç£¨ÈôÌØÕ÷ͼÊÇԭʼͼÏñϲÉÑù£¬Ðè³ËÒÔËõ·ÅÒò×Ó£©
+#         arc_params[..., 0] = torch.sigmoid(arc_params[..., 0]) * W  # cx
+#         arc_params[..., 1] = torch.sigmoid(arc_params[..., 1]) * H  # cy
+#
+#         arc_params[..., 2] = torch.sigmoid(arc_params[..., 2]) * W  # cx in image width range
+#         arc_params[..., 3] = torch.sigmoid(arc_params[..., 3]) * H  # cy in image height range
+#         # arc_params[..., 2] = F.relu(arc_params[..., 2]) + 1e-6       # a > 0
+#         # arc_params[..., 3] = F.relu(arc_params[..., 3]) + 1e-6       # b > 0
+#
+#         # Angle between 0~2¦Ð
+#         arc_params[..., 4] = torch.sigmoid(arc_params[..., 4]) * (2 * 3.1415926535)
+#
+#         # ------------------------------------------------
+#         # Last two values are auxiliary points
+#         # Now mapped to the same spatial range as image
+#         # ------------------------------------------------
+#         arc_params[..., 5] = torch.sigmoid(arc_params[..., 5]) * W   # x auxiliary
+#         arc_params[..., 6] = torch.sigmoid(arc_params[..., 6]) * H   # y auxiliary
+#         arc_params[..., 7] = torch.sigmoid(arc_params[..., 7]) * W  # x auxiliary
+#         arc_params[..., 8] = torch.sigmoid(arc_params[..., 8]) * H  # y auxiliary
+#         print(f'arc_params in head:{arc_params}')
+#         return arc_params
+
+
+
+
+

+ 19 - 2
models/line_detect/loi_heads.py

@@ -1838,8 +1838,25 @@ def compute_arc_equation_loss(arc_equation, proposals, gt_mask_ends, gt_mask_par
                                                                         gt_labels_all, arc_pos_matched_idxs):
         print(f'line_proposals_per_image:{proposals_per_image.shape}')
         # gt_angle = torch.tensor(gt_angle)
-        gt_ends = torch.tensor(gt_ends)
-        gt_params = torch.tensor(gt_params)
+
+
+        # gt_ends = torch.tensor(gt_ends)
+        # gt_params = torch.tensor(gt_params)
+        if isinstance(gt_ends, np.ndarray):
+            gt_ends = torch.from_numpy(gt_ends).float()  # numpy¡útensor£¨float32£©
+        else:
+            gt_ends = gt_ends.clone().detach().float()  # tensor¡ú¸´ÖÆ+°þÀëÌݶÈ+תfloat32
+
+        if isinstance(gt_params, np.ndarray):
+            gt_params = torch.from_numpy(gt_params).float()
+        else:
+            gt_params = gt_params.clone().detach().float()
+
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        gt_ends = gt_ends.to(device)
+        gt_params = gt_params.to(device)
+
+
         if gt_ends.shape[0] > 0:
             # positions = (gt_label == 3).nonzero()[0].item()
 

+ 53 - 42
models/line_detect/trainer.py

@@ -18,7 +18,6 @@ from models.config.config_tool import read_yaml
 from models.line_detect.line_dataset import LineDataset
 import torch.nn.functional as F
 
-
 from tools import utils
 
 import matplotlib as mpl
@@ -29,6 +28,8 @@ cmap = plt.get_cmap("jet")
 norm = mpl.colors.Normalize(vmin=0.4, vmax=1.0)
 sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
 sm.set_array([])
+
+
 def _loss(losses):
     total_loss = 0
     for i in losses.keys():
@@ -41,6 +42,8 @@ def _loss(losses):
         loss = loss_labels[0][name].mean()
         total_loss += loss
     return total_loss
+
+
 def c(x):
     return sm.to_rgba(x)
 
@@ -48,7 +51,6 @@ def c(x):
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
 
-
 def draw_ellipses_on_image(image, masks_pred, threshold=0.5, color=(0, 255, 0), thickness=2):
     """
     在单张原始图像上绘制从 masks 拟合出的椭圆。
@@ -153,9 +155,12 @@ def fit_circle(points):
     r = np.sqrt(cx ** 2 + cy ** 2 - F)
 
     return (cx, cy), r
+
+
 from PIL import ImageDraw, Image
 import io
 
+
 def draw_el(all, background_img):
     """
     all = [x_center, y_center, a, b, theta, x1, y1, x2, y2]
@@ -169,6 +174,18 @@ def draw_el(all, background_img):
 
     # Unpack parameters
     cx, cy, a, b, theta_deg, x1, y1, x2, y2 = all
+
+    cx = cx / 672 * 2000
+    cy = cy / 672 * 2000
+    # a = a / 672 * 2000
+    # b = b / 672 * 2000
+    x1 = x1 / 672 * 2000
+    y1 = y1 / 672 * 2000
+    x2 = x2 / 672 * 2000
+    y2 = y2 / 672 * 2000
+
+
+
     theta = np.radians(theta_deg)
 
     # ====== Draw ellipse ======
@@ -200,6 +217,7 @@ def draw_el(all, background_img):
 
     return img_tensor
 
+
 # from PIL import ImageDraw, Image
 # import io
 # # 绘制椭圆
@@ -237,15 +255,15 @@ def draw_el(all, background_img):
 #     plt.plot(P1[0], P1[1], 'ro', markersize=10)
 #     plt.plot(P2[0], P2[1], 'go', markersize=10)
 
-    # 转换为TensorBoard所需的张量格式 [C, H, W]
-    # buf = io.BytesIO()
-    # plt.savefig(buf, format='png', bbox_inches='tight')
-    # buf.seek(0)
-    # result_img = Image.open(buf).convert('RGB')
-    # img_tensor = torch.from_numpy(np.array(result_img)).permute(2, 0, 1)
-    # plt.close()
-    #
-    # return img_tensor
+# 转换为TensorBoard所需的张量格式 [C, H, W]
+# buf = io.BytesIO()
+# plt.savefig(buf, format='png', bbox_inches='tight')
+# buf.seek(0)
+# result_img = Image.open(buf).convert('RGB')
+# img_tensor = torch.from_numpy(np.array(result_img)).permute(2, 0, 1)
+# plt.close()
+#
+# return img_tensor
 # 由低到高蓝黄红
 def draw_lines_with_scores(tensor_image, lines, scores, width=3, cmap='viridis'):
     """
@@ -304,7 +322,7 @@ class Trainer(BaseTrainer):
             self.best_train_model_path = os.path.join(self.wts_path, 'best_train.pth')
             self.best_val_model_path = os.path.join(self.wts_path, 'best_val.pth')
             self.max_epoch = kwargs['train_params']['max_epoch']
-            self.augmentation= kwargs['train_params']["augmentation"]
+            self.augmentation = kwargs['train_params']["augmentation"]
 
     def move_to_device(self, data, device):
         if isinstance(data, (list, tuple)):
@@ -385,11 +403,7 @@ class Trainer(BaseTrainer):
             print(f"No saved model found at {save_path}")
         return model, optimizer
 
-
-
-
-
-    def writer_predict_result(self, img, result, epoch,):
+    def writer_predict_result(self, img, result, epoch, ):
         img = img.cpu().detach()
         im = img.permute(1, 2, 0)  # [512, 512, 3]
         self.writer.add_image("z-ori", im, epoch, dataformats="HWC")
@@ -402,21 +416,20 @@ class Trainer(BaseTrainer):
 
         self.writer.add_image("z-obj", boxed_image.permute(1, 2, 0), epoch, dataformats="HWC")
 
-
-        if  'points' in result:
+        if 'points' in result:
             keypoint_img = draw_keypoints(boxed_image, result['points'], colors='red', width=3)
 
             self.writer.add_image("z-output", keypoint_img, epoch)
         # print("lines shape:", result['lines'].shape)
 
-
         if 'lines' in result:
             # 用自己写的函数画线段
             # line_image = draw_lines(boxed_image, result['lines'], color='red', width=3)
             print(f"shape of linescore:{result['lines_scores'].shape}")
             scores = result['lines_scores'].mean(dim=1)  # shape: [31]
 
-            line_image = draw_lines_with_scores((img * 255).to(torch.uint8),  result['lines'],scores, width=3, cmap='jet')
+            line_image = draw_lines_with_scores((img * 255).to(torch.uint8), result['lines'], scores, width=3,
+                                                cmap='jet')
 
             self.writer.add_image("z-output_line", line_image.permute(1, 2, 0), epoch, dataformats="HWC")
 
@@ -436,8 +449,8 @@ class Trainer(BaseTrainer):
         if 'ins_masks' in result:
             # points=result['circles']
             # points=points.squeeze(1)
-            ppp=result['ins_masks']
-            bbb=result['boxes']
+            ppp = result['ins_masks']
+            bbb = result['boxes']
             print(f'boxes shape:{bbb.shape}')
             print(f'ppp:{ppp.shape}')
             ins_masks = result['ins_masks']
@@ -454,25 +467,22 @@ class Trainer(BaseTrainer):
             # keypoint_img = draw_keypoints((img * 255).to(torch.uint8), points, colors='red', width=3)
             self.writer.add_image('z-ins-masks', sum_mask.squeeze(0), global_step=epoch)
 
-
             result_imgs = draw_ellipses_on_image(img, ins_masks, threshold=0.5)
-            self.writer.add_image('z-out-ellipses', result_imgs, dataformats='HWC', global_step= epoch)
+            self.writer.add_image('z-out-ellipses', result_imgs, dataformats='HWC', global_step=epoch)
 
-            features=self.apply_gaussian_blur_to_tensor(features,sigma=3)
+            features = self.apply_gaussian_blur_to_tensor(features, sigma=3)
             self.writer.add_image('z-feature', features, global_step=epoch)
 
             # cv2.imshow('arc', img_rgb)
             # cv2.waitKey(1000000)
 
-
-
-    def normalize_tensor(self,tensor):
+    def normalize_tensor(self, tensor):
         """Normalize tensor to [0, 1]"""
         min_val = tensor.min()
         max_val = tensor.max()
         return (tensor - min_val) / (max_val - min_val)
 
-    def apply_gaussian_blur_to_tensor(self,feature_map, sigma=3):
+    def apply_gaussian_blur_to_tensor(self, feature_map, sigma=3):
         """
         Apply Gaussian blur to a feature map and convert it into an RGB heatmap.
 
@@ -500,6 +510,7 @@ class Trainer(BaseTrainer):
         colored_tensor = torch.from_numpy(colored_rgb).permute(2, 0, 1)  # (3, H, W)
 
         return colored_tensor.float()
+
     def writer_loss(self, losses, epoch, phase='train'):
         try:
             for key, value in losses.items():
@@ -525,8 +536,10 @@ class Trainer(BaseTrainer):
 
         self.init_params(**kwargs)
 
-        dataset_train = LineDataset(dataset_path=self.dataset_path,augmentation=self.augmentation, data_type=self.data_type, dataset_type='train')
-        dataset_val = LineDataset(dataset_path=self.dataset_path,augmentation=self.augmentation, data_type=self.data_type, dataset_type='val')
+        dataset_train = LineDataset(dataset_path=self.dataset_path, augmentation=self.augmentation,
+                                    data_type=self.data_type, dataset_type='train')
+        dataset_val = LineDataset(dataset_path=self.dataset_path, augmentation=self.augmentation,
+                                  data_type=self.data_type, dataset_type='val')
 
         train_sampler = torch.utils.data.RandomSampler(dataset_train)
         val_sampler = torch.utils.data.RandomSampler(dataset_val)
@@ -536,7 +549,7 @@ class Trainer(BaseTrainer):
         val_collate_fn = utils.collate_fn
 
         data_loader_train = torch.utils.data.DataLoader(
-            dataset_train, batch_sampler=train_batch_sampler,  num_workers=self.num_workers, collate_fn=train_collate_fn
+            dataset_train, batch_sampler=train_batch_sampler, num_workers=self.num_workers, collate_fn=train_collate_fn
         )
         data_loader_val = torch.utils.data.DataLoader(
             dataset_val, batch_sampler=val_batch_sampler, num_workers=self.num_workers, collate_fn=val_collate_fn
@@ -568,12 +581,11 @@ class Trainer(BaseTrainer):
                 model, epoch_val_loss = self.one_epoch(model, data_loader_val, epoch, optimizer, phase='val')
                 scheduler.step(epoch_val_loss)
 
-            if epoch==0:
+            if epoch == 0:
                 best_train_loss = epoch_train_loss
                 best_val_loss = epoch_val_loss
 
-
-            self.save_last_model(model,self.last_model_path, epoch, optimizer)
+            self.save_last_model(model, self.last_model_path, epoch, optimizer)
             best_train_loss = self.save_best_model(model, self.best_train_model_path, epoch, epoch_train_loss,
                                                    best_train_loss,
                                                    optimizer)
@@ -592,8 +604,8 @@ class Trainer(BaseTrainer):
         for imgs, targets in data_loader:
             imgs = self.move_to_device(imgs, device)
             targets = self.move_to_device(targets, device)
-            if phase== 'val':
-                result,loss_dict = model(imgs, targets)
+            if phase == 'val':
+                result, loss_dict = model(imgs, targets)
                 losses = sum(loss_dict.values())
 
                 print(f'val losses:{losses}')
@@ -604,7 +616,7 @@ class Trainer(BaseTrainer):
                 print(f'train losses:{losses}')
 
             # loss = _loss(losses)
-            loss=losses
+            loss = losses
             total_loss += loss.item()
             if phase == 'train':
                 optimizer.zero_grad()
@@ -623,14 +635,13 @@ class Trainer(BaseTrainer):
                 from utils.data_process.show_prams import print_params
                 print_params(imgs[0], result[0], epoch)
                 self.writer_predict_result(img=imgs[0], result=result[0], epoch=epoch)
-                epoch_step+=1
+                epoch_step += 1
 
         avg_loss = total_loss / len(data_loader)
         print(f'{phase}/loss epoch{epoch}:{avg_loss:4f}')
         self.writer.add_scalar(f'loss/{phase}', avg_loss, epoch)
         return model, avg_loss
 
-
     def save_best_model(self, model, save_path, epoch, current_loss, best_loss, optimizer=None):
         os.makedirs(os.path.dirname(save_path), exist_ok=True)
 
@@ -669,4 +680,4 @@ class Trainer(BaseTrainer):
 
 
 if __name__ == '__main__':
-    print('')
+    print('')