[GRASS-SVN] r73132 - in grass-addons/grass7/imagery/i.ann.maskrcnn: i.ann.maskrcnn.train maskrcnnlib

Tue Aug 21 12:41:15 PDT 2018

Author: pesekon2
Date: 2018-08-21 12:41:15 -0700 (Tue, 21 Aug 2018)
New Revision: 73132

Modified:
   grass-addons/grass7/imagery/i.ann.maskrcnn/i.ann.maskrcnn.train/i.ann.maskrcnn.train.py
   grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/config.py
   grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/model.py
   grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/utils.py
Log:
allow modifying FPN size, parameters upgrades

Modified: grass-addons/grass7/imagery/i.ann.maskrcnn/i.ann.maskrcnn.train/i.ann.maskrcnn.train.py
===================================================================

--- grass-addons/grass7/imagery/i.ann.maskrcnn/i.ann.maskrcnn.train/i.ann.maskrcnn.train.py	2018-08-21 16:57:50 UTC (rev 73131)
+++ grass-addons/grass7/imagery/i.ann.maskrcnn/i.ann.maskrcnn.train/i.ann.maskrcnn.train.py	2018-08-21 19:41:15 UTC (rev 73132)
@@ -40,7 +40,7 @@
 #% label: Path to the dataset with images and masks
 #% required: yes
 #%end
-#%option
+#%option G_OPT_F_INPUT
 #% key: model
 #% type: string
 #% label: Path to the .h5 file to use as initial values

Modified: grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/config.py
===================================================================
--- grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/config.py	2018-08-21 16:57:50 UTC (rev 73131)
+++ grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/config.py	2018-08-21 19:41:15 UTC (rev 73132)
@@ -31,6 +31,12 @@
     # are based on a Resnet101 backbone.
     BACKBONE_STRIDES = [4, 8, 16, 32, 64]
 
+    # Size of the fully-connected layers in the classification graph
+    FPN_CLASSIF_FC_LAYERS_SIZE = 1024
+
+    # Size of the top-down layers used to build the feature pyramid
+    TOP_DOWN_PYRAMID_SIZE = 256
+
     ## RPN ##
     # Length of square anchor side in pixels
     RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
@@ -183,11 +189,15 @@
         self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
 
         # Set the backbone architecture
+        # Use a pedefined one or provide a callable that should have the
+        # signature of model.resnet_graph. If you do so, you need to supply
+        # a callable to COMPUTE_BACKBONE_SHAPE as well
         self.BACKBONE = backbone
 
-        # Input image size
-        self.IMAGE_SHAPE = np.array(
-            [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
+        # Only useful if you supply a callable to BACKBONE. Should compute
+        # the shape of each layer of the FPN Pyramid.
+        # See model.compute_backbone_shapes
+        COMPUTE_BACKBONE_SHAPE = None
 
         # Compute backbone size from input image size
         self.BACKBONE_SHAPES = np.array(
@@ -195,6 +205,10 @@
               int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
              for stride in self.BACKBONE_STRIDES])
 
+        # Input image size
+        self.IMAGE_SHAPE = np.array(
+            [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
+
         # Train or freeze batch normalization layers
         #  None: Train BN layers in a normal mode
         #  False: Freeze BN layers (recommended for small batch size)

Modified: grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/model.py
===================================================================
--- grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/model.py	2018-08-21 16:57:50 UTC (rev 73131)
+++ grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/model.py	2018-08-21 19:41:15 UTC (rev 73132)
@@ -78,6 +78,9 @@
     Returns:
         [N, (height, width)]. Where N is the number of stages
     """
+    if callable(config.BACKBONE):
+        return config.COMPUTE_BACKBONE_SHAPE(image_shape)
+
     # Currently supports ResNet only
     assert config.BACKBONE in ["resnet50", "resnet101"]
     return np.array(
@@ -565,7 +568,11 @@
 
     # Assign positive ROIs to GT boxes.
     positive_overlaps = tf.gather(overlaps, positive_indices)
-    roi_gt_box_assignment = tf.argmax(positive_overlaps, axis=1)
+    roi_gt_box_assignment = tf.cond(
+        tf.greater(tf.shape(positive_overlaps)[1], 0),
+        true_fn = lambda: tf.argmax(positive_overlaps, axis=1),
+        false_fn = lambda: tf.cast(tf.constant([]),tf.int64)
+    )
     roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
     roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)
 
@@ -899,7 +906,8 @@
 ############################################################
 
 def fpn_classifier_graph(rois, feature_maps, image_meta,
-                         pool_size, num_classes, train_bn=True):
+                         pool_size, num_classes, train_bn=True,
+                         fc_layers_size=1024):
     """Builds the computation graph of the feature pyramid network classifier
     and regressor heads.
 
@@ -911,6 +919,7 @@
     pool_size: The width of the square feature map generated from ROI Pooling.
     num_classes: number of classes, which determines the depth of the results
     train_bn: Boolean. Train or freeze Batch Norm layres
+    fc_layers_size: Size of the 2 FC layers
 
     Returns:
         logits: [N, NUM_CLASSES] classifier logits (before softmax)
@@ -924,11 +933,11 @@
                         name="roi_align_classifier")(
                             [rois, image_meta] + feature_maps)
     # Two 1024 FC layers (implemented with Conv2D for consistency)
-    x = KL.TimeDistributed(KL.Conv2D(1024, (pool_size, pool_size), padding="valid"),
+    x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"),
                            name="mrcnn_class_conv1")(x)
     x = KL.TimeDistributed(BatchNorm(), name='mrcnn_class_bn1')(x, training=train_bn)
     x = KL.Activation('relu')(x)
-    x = KL.TimeDistributed(KL.Conv2D(1024, (1, 1)),
+    x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)),
                            name="mrcnn_class_conv2")(x)
     x = KL.TimeDistributed(BatchNorm(), name='mrcnn_class_bn2')(x, training=train_bn)
     x = KL.Activation('relu')(x)
@@ -1903,25 +1912,29 @@
         # Bottom-up Layers
         # Returns a list of the last layers of each stage, 5 in total.
         # Don't create the thead (stage 5), so we pick the 4th item in the list.
-        _, C2, C3, C4, C5 = resnet_graph(input_image, config.BACKBONE,
-                                         stage5=True, train_bn=config.TRAIN_BN)
+        if callable(config.BACKBONE):
+            _, C2, C3, C4, C5 = config.BACKBONE(input_image, stage5=True,
+                                                train_bn=config.TRAIN_BN)
+        else:
+            _, C2, C3, C4, C5 = resnet_graph(input_image, config.BACKBONE,
+                                             stage5=True, train_bn=config.TRAIN_BN)
         # Top-down Layers
         # TODO: add assert to varify feature map sizes match what's in config
-        P5 = KL.Conv2D(256, (1, 1), name='fpn_c5p5')(C5)
+        P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c5p5')(C5)
         P4 = KL.Add(name="fpn_p4add")([
             KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5),
-            KL.Conv2D(256, (1, 1), name='fpn_c4p4')(C4)])
+            KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c4p4')(C4)])
         P3 = KL.Add(name="fpn_p3add")([
             KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4),
-            KL.Conv2D(256, (1, 1), name='fpn_c3p3')(C3)])
+            KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c3p3')(C3)])
         P2 = KL.Add(name="fpn_p2add")([
             KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3),
-            KL.Conv2D(256, (1, 1), name='fpn_c2p2')(C2)])
+            KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c2p2')(C2)])
         # Attach 3x3 conv to all P layers to get the final feature maps.
-        P2 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p2")(P2)
-        P3 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p3")(P3)
-        P4 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p4")(P4)
-        P5 = KL.Conv2D(256, (3, 3), padding="SAME", name="fpn_p5")(P5)
+        P2 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p2")(P2)
+        P3 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p3")(P3)
+        P4 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p4")(P4)
+        P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p5")(P5)
         # P6 is used for the 5th anchor scale in RPN. Generated by
         # subsampling from P5 with stride of 2.
         P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5)
@@ -1944,7 +1957,7 @@
 
         # RPN Model
         rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE,
-                              len(config.RPN_ANCHOR_RATIOS), 256)
+                              len(config.RPN_ANCHOR_RATIOS), config.TOP_DOWN_PYRAMID_SIZE)
         # Loop through pyramid layers
         layer_outputs = []  # list of lists
         for p in rpn_feature_maps:
@@ -1999,15 +2012,12 @@
             # Network Heads
             # TODO: verify that this handles zero padded ROIs
             mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
-                fpn_classifier_graph(rois,
-                                     mrcnn_feature_maps,
-                                     input_image_meta,
-                                     config.POOL_SIZE,
-                                     config.NUM_CLASSES,
-                                     train_bn=config.TRAIN_BN)
+                fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta,
+                                     config.POOL_SIZE, config.NUM_CLASSES,
+                                     train_bn=config.TRAIN_BN,
+                                     fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
 
-            mrcnn_mask = build_fpn_mask_graph(rois,
-                                              mrcnn_feature_maps,
+            mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps,
                                               input_image_meta,
                                               config.MASK_POOL_SIZE,
                                               config.NUM_CLASSES,
@@ -2042,12 +2052,10 @@
             # Network Heads
             # Proposal classifier and BBox regressor heads
             mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
-                fpn_classifier_graph(rpn_rois,
-                                     mrcnn_feature_maps,
-                                     input_image_meta,
-                                     config.POOL_SIZE,
-                                     config.NUM_CLASSES,
-                                     train_bn=config.TRAIN_BN)
+                fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, input_image_meta,
+                                     config.POOL_SIZE, config.NUM_CLASSES,
+                                     train_bn=config.TRAIN_BN,
+                                     fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
 
             # Detections
             # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in
@@ -2080,8 +2088,7 @@
         """Finds the last checkpoint file of the last trained model in the
         model directory.
         Returns:
-            log_dir: The directory where events and weights are saved
-            checkpoint_path: the path to the last checkpoint file
+            The path of the last checkpoint file
         """
         # Get directory names. Each directory corresponds to a model
         dir_names = next(os.walk(self.model_dir))[1]
@@ -2089,7 +2096,11 @@
         dir_names = filter(lambda f: f.startswith(key), dir_names)
         dir_names = sorted(dir_names)
         if not dir_names:
-            return None, None
+            import errno
+            raise FileNotFoundError(
+                errno.ENOENT,
+                "Could not find model directory under {}".format(
+                    self.model_dir))
         # Pick last directory
         dir_name = os.path.join(self.model_dir, dir_names[-1])
         # Find the last checkpoint
@@ -2097,9 +2108,13 @@
         checkpoints = filter(lambda f: f.startswith("mask_rcnn"), checkpoints)
         checkpoints = sorted(checkpoints)
         if not checkpoints:
-            return dir_name, None
+            import errno
+            raise FileNotFoundError(
+                errno.ENOENT,
+                "Could not find weight files in {}".format(
+                    dir_name))
         checkpoint = os.path.join(dir_name, checkpoints[-1])
-        return dir_name, checkpoint
+        return checkpoint
 
     def load_weights(self, filepath, by_name=False, exclude=None):
         """Modified version of the correspoding Keras function with
@@ -2255,7 +2270,7 @@
             # Continue from we left of. Get epoch and date from the file name
             # A sample model path might look like:
             # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5
-            regex = r".*/\w+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})/mask\_rcnn\_\w+(\d{4})\.h5"
+            regex = r".*/[\w-]+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})/mask\_rcnn\_[\w-]+(\d{4})\.h5"
             m = re.match(regex, model_path)
             if m:
                 now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)),
@@ -2263,11 +2278,16 @@
                 # Epoch number in file is 1-based, and in Keras code it's 0-based.
                 # So, adjust for that then increment by one to start from the next epoch
                 self.epoch = int(m.group(6))
+                print('Re-starting from epoch %d' % self.epoch)
 
         # Directory for training logs
         self.log_dir = os.path.join(self.model_dir, "{}{:%Y%m%dT%H%M}".format(
             self.config.NAME.lower(), now))
 
+        # Create log_dir if not exists
+        if not os.path.exists(self.log_dir):
+            os.makedirs(self.log_dir)
+
         # Path to save after each epoch. Include placeholders that get filled by Keras.
         self.checkpoint_path = os.path.join(self.log_dir, "mask_rcnn_{}_*epoch*.h5".format(
             self.config.NAME.lower()))
@@ -2286,7 +2306,7 @@
         layers: Allows selecting wich layers to train. It can be:
             - A regular expression to match layer names to train
             - One of these predefined values:
-              heaads: The RPN, classifier and mask heads of the network
+              heads: The RPN, classifier and mask heads of the network
               all: All the layers
               3+: Train Resnet stage 3 and up
               4+: Train Resnet stage 4 and up
@@ -2463,7 +2483,7 @@
             full_mask = utils.unmold_mask(masks[i], boxes[i], original_image_shape)
             full_masks.append(full_mask)
         full_masks = np.stack(full_masks, axis=-1)\
-            if full_masks else np.empty(masks.shape[1:3] + (0,))
+            if full_masks else np.empty(original_image_shape[:2] + (0,))
 
         return boxes, class_ids, scores, full_masks
 

Modified: grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/utils.py
===================================================================
--- grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/utils.py	2018-08-21 16:57:50 UTC (rev 73131)
+++ grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/utils.py	2018-08-21 19:41:15 UTC (rev 73132)
@@ -25,7 +25,7 @@
 import warnings
 
 # URL from which to download the latest COCO trained weights
-# https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5
+# COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
 
 
 ############################################################
@@ -102,7 +102,11 @@
     '''Computes IoU overlaps between two sets of masks.
     masks1, masks2: [Height, Width, instances]
     '''
-    # flatten masks
+
+    # If either set of masks is empty return empty result
+    if masks1.shape[0] == 0 or masks2.shape[0] == 0:
+        return np.zeros((masks1.shape[0], masks2.shape[-1]))
+    # flatten masks and compute their areas
     masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
     masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
     area1 = np.sum(masks1, axis=0)