Procházet zdrojové kódy

Add demo.py; Closes #4

Yichao Zhou před 6 roky
rodič
revize
5d2bd1bb8a
10 změnil soubory, kde provedl 274 přidání a 121 odebrání
  1. 1 1
      LICENSE
  2. 17 6
      README.md
  3. 138 0
      demo.py
  4. 1 1
      eval-APH.py
  5. 16 16
      lcnn/models/line_vectorizer.py
  6. 3 1
      lcnn/models/multitask_learner.py
  7. 77 0
      lcnn/postprocess.py
  8. 7 7
      lcnn/trainer.py
  9. 6 81
      post.py
  10. 8 8
      train.py

+ 1 - 1
LICENSE

@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2019 Yichao Zhou
+Copyright (c) 2019-2020 Yichao Zhou
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

+ 17 - 6
README.md

@@ -60,6 +60,7 @@ lcnn/                           # lcnn module so you can "import lcnn" in other
     trainer.py                  # trainer
     config.py                   # global variables for configuration
     utils.py                    # misc functions
+demo.py                         # script for detecting wireframes for an image
 eval-sAP.py                     # script for sAP evaluation
 eval-APH.py                     # script for APH evaluation
 eval-mAPJ.py                    # script for mAPJ evaluation
@@ -86,6 +87,22 @@ conda install -y pyyaml docopt matplotlib scikit-image opencv
 mkdir data logs post
 ```
 
+### Pre-trained Models
+
+You can download our reference pre-trained models from [Google
+Drive](https://drive.google.com/file/d/1NvZkEqWNUBAfuhFPNGiCItjy4iU0UOy2).  Those models were
+trained with `config/wireframe.yaml` for 312k iterations.  Use `demo.py`, `process.py`, and
+`eval-*.py` to evaluate the pre-trained models. **Do not try to unzip them!**
+
+### Detect Wireframes for Your Own Images
+To test LCNN on your own images, you need download the pre-trained models and execute
+
+```Bash
+python ./demo.py -d 0 config/wireframe.yaml <path-to-pretrained-pth> <path-to-image>
+```
+Here, `-d 0` is specifying the GPU ID used for evaluation, and you can specify `-d ""` to force CPU inference.
+
+
 ### Downloading the Processed Dataset
 Make sure `curl` is installed on your system and execute
 ```bash
@@ -121,12 +138,6 @@ To train the neural network on GPU 0 (specified by `-d 0`) with the default para
 python ./train.py -d 0 --identifier baseline  config/wireframe.yaml
 ```
 
-### Pre-trained Models
-
-You can download our reference pre-trained models from [Google
-Drive](https://drive.google.com/file/d/1NvZkEqWNUBAfuhFPNGiCItjy4iU0UOy2).  Those models were trained
-with `config/wireframe.yaml` for 312k iterations.  Use `process.py` and `eval.py` to evaluate the pre-trained models. **Do not try to unzip them!**
-
 ### Post Processing
 
 To post process the outputs from neural network (only necessary if you are going to evaluate AP<sup>H</sup>), execute

+ 138 - 0
demo.py

@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+"""Process an image with the trained neural network
+Usage:
+    demo.py [options] <yaml-config> <checkpoint> <image>
+    demo.py (-h | --help )
+
+Arguments:
+   <yaml-config>                 Path to the yaml hyper-parameter file
+   <checkpoint>                  Path to the checkpoint
+   <image>                       Path to the directory containing processed images
+
+Options:
+   -h --help                     Show this screen.
+   -d --devices <devices>        Comma seperated GPU devices [default: 0]
+"""
+
+import os
+import os.path as osp
+import pprint
+import random
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import skimage.io
+import skimage.transform
+import torch
+import yaml
+from docopt import docopt
+
+import lcnn
+from lcnn.config import C, M
+from lcnn.models.line_vectorizer import LineVectorizer
+from lcnn.models.multitask_learner import MultitaskHead, MultitaskLearner
+from lcnn.postprocess import postprocess
+from lcnn.utils import recursive_to
+
+PLTOPTS = {"color": "#33FFFF", "s": 15, "edgecolors": "none", "zorder": 5}
+cmap = plt.get_cmap("jet")
+norm = mpl.colors.Normalize(vmin=0.9, vmax=1.0)
+sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
+sm.set_array([])
+
+
+def c(x):
+    return sm.to_rgba(x)
+
+
+def main():
+    args = docopt(__doc__)
+    config_file = args["<yaml-config>"] or "config/wireframe.yaml"
+    C.update(C.from_yaml(filename=config_file))
+    M.update(C.model)
+    pprint.pprint(C, indent=4)
+
+    random.seed(0)
+    np.random.seed(0)
+    torch.manual_seed(0)
+
+    device_name = "cpu"
+    os.environ["CUDA_VISIBLE_DEVICES"] = args["--devices"]
+    if torch.cuda.is_available():
+        device_name = "cuda"
+        torch.backends.cudnn.deterministic = True
+        torch.cuda.manual_seed(0)
+        print("Let's use", torch.cuda.device_count(), "GPU(s)!")
+    else:
+        print("CUDA is not available")
+    device = torch.device(device_name)
+    checkpoint = torch.load(args["<checkpoint>"], map_location=device)
+
+    # Load model
+    model = lcnn.models.hg(
+        depth=M.depth,
+        head=lambda c_in, c_out: MultitaskHead(c_in, c_out),
+        num_stacks=M.num_stacks,
+        num_blocks=M.num_blocks,
+        num_classes=sum(sum(M.head_size, [])),
+    )
+    model = MultitaskLearner(model)
+    model = LineVectorizer(model)
+    model.load_state_dict(checkpoint["model_state_dict"])
+    model = model.to(device)
+    model.eval()
+
+    im = skimage.io.imread(args["<image>"])[:, :, :3]
+    im_resized = skimage.transform.resize(im, (512, 512)) * 255
+    image = (im_resized - M.image.mean) / M.image.stddev
+    image = torch.from_numpy(np.rollaxis(image, 2)[None].copy()).float()
+    with torch.no_grad():
+        input_dict = {
+            "image": image.to(device),
+            "meta": [
+                {
+                    "junc": torch.zeros(1, 2).to(device),
+                    "jtyp": torch.zeros(1, dtype=torch.uint8).to(device),
+                    "Lpos": torch.zeros(2, 2, dtype=torch.uint8).to(device),
+                    "Lneg": torch.zeros(2, 2, dtype=torch.uint8).to(device),
+                }
+            ],
+            "target": {
+                "jmap": torch.zeros([1, 1, 128, 128]).to(device),
+                "joff": torch.zeros([1, 1, 2, 128, 128]).to(device),
+            },
+            "do_evaluation": True,
+        }
+        H = model(input_dict)["preds"]
+
+    lines = H["lines"][0].cpu().numpy() / 128 * im.shape[:2]
+    scores = H["score"][0].cpu().numpy()
+    for i in range(1, len(lines)):
+        if (lines[i] == lines[0]).all():
+            lines = lines[:i]
+            scores = scores[:i]
+            break
+
+    # postprocess lines to remove overlapped lines
+    diag = (im.shape[0] ** 2 + im.shape[1] ** 2) ** 0.5
+    nlines, nscores = postprocess(lines, scores, diag * 0.01, 0, False)
+
+    plt.gca().set_axis_off()
+    plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
+    plt.margins(0, 0)
+    plt.gca().xaxis.set_major_locator(plt.NullLocator())
+    plt.gca().yaxis.set_major_locator(plt.NullLocator())
+    for i, t in enumerate([0.95, 0.96, 0.97, 0.98, 0.99]):
+        for (a, b), s in zip(nlines, nscores):
+            if s < t:
+                continue
+            plt.plot([a[1], b[1]], [a[0], b[0]], c=c(s), linewidth=2, zorder=s)
+            plt.scatter(a[1], a[0], **PLTOPTS)
+            plt.scatter(b[1], b[0], **PLTOPTS)
+        plt.imshow(im)
+        plt.show()
+
+
+if __name__ == "__main__":
+    main()

+ 1 - 1
eval-APH.py

@@ -5,7 +5,7 @@ Usage:
     eval-APH.py (-h | --help )
 
 Examples:
-    python eval-APH.py logs/*
+    ./eval-APH.py post/RUN-ITERATION/0_010 post/RUN-ITERATION/0_010-APH
 
 Arguments:
     <src>                Source directory that stores preprocessed npz

+ 16 - 16
lcnn/models/line_vectorizer.py

@@ -1,11 +1,10 @@
-import random
 import itertools
+import random
 from collections import defaultdict
 
 import numpy as np
 import torch
 import torch.nn as nn
-import torch.nn.parallel
 import torch.nn.functional as F
 
 from lcnn.config import M
@@ -96,18 +95,6 @@ class LineVectorizer(nn.Module):
         x = torch.cat([x, f], 1)
         x = self.fc2(x).flatten()
 
-        def sum_batch(x):
-            xs = [x[idx[i] : idx[i + 1]].sum()[None] for i in range(n_batch)]
-            return torch.cat(xs)
-
-        loss = self.loss(x, y)
-        lpos_mask, lneg_mask = y, 1 - y
-        loss_lpos, loss_lneg = loss * lpos_mask, loss * lneg_mask
-        lpos = sum_batch(loss_lpos) / sum_batch(lpos_mask).clamp(min=1)
-        lneg = sum_batch(loss_lneg) / sum_batch(lneg_mask).clamp(min=1)
-        result["losses"][0]["lpos"] = lpos * M.loss_weight["lpos"]
-        result["losses"][0]["lneg"] = lneg * M.loss_weight["lneg"]
-
         if input_dict["do_evaluation"]:
             p = torch.cat(ps)
             s = torch.sigmoid(x)
@@ -142,8 +129,21 @@ class LineVectorizer(nn.Module):
                     [jcs[i][1] for i in range(n_batch)]
                 )
         else:
-            if "preds" in result:
-                del result["preds"]
+            y = torch.cat(ys)
+            loss = self.loss(x, y)
+            lpos_mask, lneg_mask = y, 1 - y
+            loss_lpos, loss_lneg = loss * lpos_mask, loss * lneg_mask
+
+            def sum_batch(x):
+                xs = [x[idx[i] : idx[i + 1]].sum()[None] for i in range(n_batch)]
+                return torch.cat(xs)
+
+            lpos = sum_batch(loss_lpos) / sum_batch(lpos_mask).clamp(min=1)
+            lneg = sum_batch(loss_lneg) / sum_batch(lneg_mask).clamp(min=1)
+            result["losses"][0]["lpos"] = lpos * M.loss_weight["lpos"]
+            result["losses"][0]["lneg"] = lneg * M.loss_weight["lneg"]
+            del result["preds"]
+
         return result
 
     def sample_lines(self, meta, jmap, joff, do_evaluation):

+ 3 - 1
lcnn/models/multitask_learner.py

@@ -37,7 +37,7 @@ class MultitaskLearner(nn.Module):
         self.num_class = sum(sum(head_size, []))
         self.head_off = np.cumsum([sum(h) for h in head_size])
 
-    def forward(self, input_dict, output_feature=True):
+    def forward(self, input_dict):
         image = input_dict["image"]
         outputs, feature = self.backbone(image)
         result = {"feature": feature}
@@ -66,6 +66,8 @@ class MultitaskLearner(nn.Module):
                     "lmap": lmap.sigmoid(),
                     "joff": joff.permute(2, 0, 1, 3, 4).sigmoid() - 0.5,
                 }
+                if input_dict["do_evaluation"]:
+                    return result
 
             L = OrderedDict()
             L["jmap"] = sum(

+ 77 - 0
lcnn/postprocess.py

@@ -0,0 +1,77 @@
+import numpy as np
+
+
+def pline(x1, y1, x2, y2, x, y):
+    px = x2 - x1
+    py = y2 - y1
+    dd = px * px + py * py
+    u = ((x - x1) * px + (y - y1) * py) / max(1e-9, float(dd))
+    dx = x1 + u * px - x
+    dy = y1 + u * py - y
+    return dx * dx + dy * dy
+
+
+def psegment(x1, y1, x2, y2, x, y):
+    px = x2 - x1
+    py = y2 - y1
+    dd = px * px + py * py
+    u = max(min(((x - x1) * px + (y - y1) * py) / float(dd), 1), 0)
+    dx = x1 + u * px - x
+    dy = y1 + u * py - y
+    return dx * dx + dy * dy
+
+
+def plambda(x1, y1, x2, y2, x, y):
+    px = x2 - x1
+    py = y2 - y1
+    dd = px * px + py * py
+    return ((x - x1) * px + (y - y1) * py) / max(1e-9, float(dd))
+
+
+def postprocess(lines, scores, threshold=0.01, tol=1e9, do_clip=False):
+    nlines, nscores = [], []
+    for (p, q), score in zip(lines, scores):
+        start, end = 0, 1
+        for a, b in nlines:
+            if (
+                min(
+                    max(pline(*p, *q, *a), pline(*p, *q, *b)),
+                    max(pline(*a, *b, *p), pline(*a, *b, *q)),
+                )
+                > threshold ** 2
+            ):
+                continue
+            lambda_a = plambda(*p, *q, *a)
+            lambda_b = plambda(*p, *q, *b)
+            if lambda_a > lambda_b:
+                lambda_a, lambda_b = lambda_b, lambda_a
+            lambda_a -= tol
+            lambda_b += tol
+
+            # case 1: skip (if not do_clip)
+            if start < lambda_a and lambda_b < end:
+                continue
+
+            # not intersect
+            if lambda_b < start or lambda_a > end:
+                continue
+
+            # cover
+            if lambda_a <= start and end <= lambda_b:
+                start = 10
+                break
+
+            # case 2 & 3:
+            if lambda_a <= start and start <= lambda_b:
+                start = lambda_b
+            if lambda_a <= end and end <= lambda_b:
+                end = lambda_a
+
+            if start >= end:
+                break
+
+        if start >= end:
+            continue
+        nlines.append(np.array([p + (q - p) * start, p + (q - p) * end]))
+        nscores.append(score)
+    return np.array(nlines), np.array(nscores)

+ 7 - 7
lcnn/trainer.py

@@ -1,23 +1,23 @@
-import os
-import time
 import atexit
+import os
+import os.path as osp
 import shutil
 import signal
-import os.path as osp
-import threading
 import subprocess
+import threading
+import time
 from timeit import default_timer as timer
 
-import numpy as np
-import torch
 import matplotlib as mpl
 import matplotlib.pyplot as plt
+import numpy as np
+import torch
 import torch.nn.functional as F
 from skimage import io
 from tensorboardX import SummaryWriter
 
-from lcnn.utils import recursive_to
 from lcnn.config import C
+from lcnn.utils import recursive_to
 
 
 class Trainer(object):

+ 6 - 81
post.py

@@ -18,20 +18,22 @@ Options:
                                      [default: 0.006,0.010,0.015]
 """
 
-import os
-import sys
 import glob
 import math
+import os
 import os.path as osp
+import sys
 
 import cv2
-import numpy as np
 import matplotlib as mpl
 import matplotlib.pyplot as plt
+import numpy as np
 from docopt import docopt
 
+from lcnn.postprocess import postprocess
 from lcnn.utils import parmap
 
+PLTOPTS = {"color": "#33FFFF", "s": 1.2, "edgecolors": "none", "zorder": 5}
 cmap = plt.get_cmap("jet")
 norm = mpl.colors.Normalize(vmin=0.92, vmax=1.02)
 sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
@@ -58,82 +60,6 @@ def imshow(im):
     plt.imshow(im)
 
 
-def pline(x1, y1, x2, y2, x, y):
-    px = x2 - x1
-    py = y2 - y1
-    dd = px * px + py * py
-    u = ((x - x1) * px + (y - y1) * py) / max(1e-9, float(dd))
-    dx = x1 + u * px - x
-    dy = y1 + u * py - y
-    return dx * dx + dy * dy
-
-
-def psegment(x1, y1, x2, y2, x, y):
-    px = x2 - x1
-    py = y2 - y1
-    dd = px * px + py * py
-    u = max(min(((x - x1) * px + (y - y1) * py) / float(dd), 1), 0)
-    dx = x1 + u * px - x
-    dy = y1 + u * py - y
-    return dx * dx + dy * dy
-
-
-def plambda(x1, y1, x2, y2, x, y):
-    px = x2 - x1
-    py = y2 - y1
-    dd = px * px + py * py
-    return ((x - x1) * px + (y - y1) * py) / max(1e-9, float(dd))
-
-
-def process(lines, scores, threshold=0.01, tol=1e9, do_clip=False):
-    nlines, nscores = [], []
-    for (p, q), score in zip(lines, scores):
-        start, end = 0, 1
-        for a, b in nlines:
-            if (
-                min(
-                    max(pline(*p, *q, *a), pline(*p, *q, *b)),
-                    max(pline(*a, *b, *p), pline(*a, *b, *q)),
-                )
-                > threshold ** 2
-            ):
-                continue
-            lambda_a = plambda(*p, *q, *a)
-            lambda_b = plambda(*p, *q, *b)
-            if lambda_a > lambda_b:
-                lambda_a, lambda_b = lambda_b, lambda_a
-            lambda_a -= tol
-            lambda_b += tol
-
-            # case 1: skip (if not do_clip)
-            if start < lambda_a and lambda_b < end:
-                continue
-
-            # not intersect
-            if lambda_b < start or lambda_a > end:
-                continue
-
-            # cover
-            if lambda_a <= start and end <= lambda_b:
-                start = 10
-                break
-
-            # case 2 & 3:
-            if lambda_a <= start and start <= lambda_b:
-                start = lambda_b
-            if lambda_a <= end and end <= lambda_b:
-                end = lambda_a
-
-            if start >= end:
-                break
-
-        if start >= end:
-            continue
-        nlines.append(np.array([p + (q - p) * start, p + (q - p) * end]))
-        nscores.append(score)
-    return np.array(nlines), np.array(nscores)
-
-
 def main():
     args = docopt(__doc__)
 
@@ -167,13 +93,12 @@ def main():
         diag = (im.shape[0] ** 2 + im.shape[1] ** 2) ** 0.5
 
         for threshold in thresholds:
-            nlines, nscores = process(lines, scores, diag * threshold, 0, False)
+            nlines, nscores = postprocess(lines, scores, diag * threshold, 0, False)
 
             outdir = osp.join(prefix, f"{threshold:.3f}".replace(".", "_"))
             os.makedirs(outdir, exist_ok=True)
             npz_name = osp.join(outdir, osp.split(fname)[-1])
 
-            PLTOPTS = {"color": "#33FFFF", "s": 1.2, "edgecolors": "none", "zorder": 5}
             if args["--plot"]:
                 # plot gt
                 imshow(im[:, :, ::-1])

+ 8 - 8
train.py

@@ -13,23 +13,23 @@ Options:
    -i --identifier <identifier>    Folder identifier [default: default-identifier]
 """
 
-import os
-import sys
+import datetime
 import glob
-import shlex
+import os
+import os.path as osp
+import platform
 import pprint
 import random
+import shlex
 import shutil
 import signal
-import os.path as osp
-import datetime
-import platform
-import threading
 import subprocess
+import sys
+import threading
 
-import yaml
 import numpy as np
 import torch
+import yaml
 from docopt import docopt
 
 import lcnn