[GRASS-SVN] r73496 - grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib
svn_grass at osgeo.org
svn_grass at osgeo.org
Mon Oct 8 02:24:30 PDT 2018
Author: pesekon2
Date: 2018-10-08 02:24:30 -0700 (Mon, 08 Oct 2018)
New Revision: 73496
Modified:
grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/config.py
grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/model.py
grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/utils.py
Log:
prepare for multispectral images, fix shape of fpn_classifier
Modified: grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/config.py
===================================================================
--- grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/config.py 2018-10-07 21:39:59 UTC (rev 73495)
+++ grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/config.py 2018-10-08 09:24:30 UTC (rev 73496)
@@ -57,6 +57,9 @@
# How many anchors per image to use for RPN training
RPN_TRAIN_ANCHORS_PER_IMAGE = 256
+ # ROIs kept after tf.nn.top_k and before non-maximum suppression
+ PRE_NMS_LIMIT = 6000
+
# ROIs kept after non-maximum suppression (training and inference)
POST_NMS_ROIS_TRAINING = 2000
POST_NMS_ROIS_INFERENCE = 1000
@@ -132,7 +135,8 @@
trainROIsPerImage=64, stepsPerEpoch=1500,
miniMaskShape=None, validationSteps=100,
imageMaxDim=768, imageMinDim=768, backbone='resnet101',
- trainBatchNorm=False, resizeMode='square'):
+ trainBatchNorm=False, resizeMode='square',
+ image_channel_count=3):
"""Set values of attributes.
Written by Ondrej Pesek, but using attributes from Waleed Abdulla"""
@@ -142,7 +146,8 @@
# Number of images to train on each GPU
self.IMAGES_PER_GPU = imagesPerGPU
- # NUMBER OF GPUs to use. For CPU training, use 1
+ # NUMBER OF GPUs to use.
+ # When using only a CPU, this needs to be set to 1.
self.GPU_COUNT = GPUcount
# Number of classes (including background)
@@ -197,11 +202,16 @@
# Only useful if you supply a callable to BACKBONE. Should compute
# the shape of each layer of the FPN Pyramid.
# See model.compute_backbone_shapes
- COMPUTE_BACKBONE_SHAPE = None
+ self.COMPUTE_BACKBONE_SHAPE = None
+ # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4
+ # Changing this requires other changes in the code. See the WIKI for more
+ # details: https://github.com/matterport/Mask_RCNN/wiki
+ self.IMAGE_CHANNEL_COUNT = image_channel_count
+
# Input image size
- self.IMAGE_SHAPE = np.array(
- [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
+ self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM,
+ self.IMAGE_CHANNEL_COUNT])
# Compute backbone size from input image size
# TODO Ondrej Pesek: Maybe delete it and see Matterport's (avoid math
Modified: grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/model.py
===================================================================
--- grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/model.py 2018-10-07 21:39:59 UTC (rev 73495)
+++ grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/model.py 2018-10-08 09:24:30 UTC (rev 73496)
@@ -19,7 +19,6 @@
from collections import OrderedDict
import multiprocessing
import numpy as np
-import skimage.transform
import tensorflow as tf
import keras
import keras.backend as K
@@ -262,9 +261,9 @@
box refinement deltas to anchors.
Inputs:
- rpn_probs: [batch, anchors, (bg prob, fg prob)]
- rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))]
- anchors: [batch, (y1, x1, y2, x2)] anchors in normalized coordinates
+ rpn_probs: [batch, num_anchors, (bg prob, fg prob)]
+ rpn_bbox: [batch, num_anchors, (dy, dx, log(dh), log(dw))]
+ anchors: [batch, num_anchors, (y1, x1, y2, x2)] anchors in normalized coordinates
Returns:
Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)]
@@ -287,7 +286,7 @@
# Improve performance by trimming to top anchors by score
# and doing the rest on the smaller subset.
- pre_nms_limit = tf.minimum(6000, tf.shape(anchors)[1])
+ pre_nms_limit = tf.minimum(self.config.PRE_NMS_LIMIT, tf.shape(anchors)[1])
ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True,
name="top_anchors").indices
scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y),
@@ -348,7 +347,7 @@
"""Implements ROI Pooling on multiple levels of the feature pyramid.
Params:
- - pool_shape: [height, width] of the output pooled regions. Usually [7, 7]
+ - pool_shape: [pool_height, pool_width] of the output pooled regions. Usually [7, 7]
Inputs:
- boxes: [batch, num_boxes, (y1, x1, y2, x2)] in normalized
@@ -355,11 +354,11 @@
coordinates. Possibly padded with zeros if not enough
boxes to fill the array.
- image_meta: [batch, (meta data)] Image details. See compose_image_meta()
- - Feature maps: List of feature maps from different levels of the pyramid.
+ - feature_maps: List of feature maps from different levels of the pyramid.
Each is [batch, height, width, channels]
Output:
- Pooled regions in the shape: [batch, num_boxes, height, width, channels].
+ Pooled regions in the shape: [batch, num_boxes, pool_height, pool_width, channels].
The width and height are those specific in the pool_shape in the layer
constructor.
"""
@@ -445,7 +444,8 @@
pooled = tf.gather(pooled, ix)
# Re-add the batch dimension
- pooled = tf.expand_dims(pooled, 0)
+ shape = tf.concat([tf.shape(boxes)[:2], tf.shape(pooled)[1:]], axis=0)
+ pooled = tf.reshape(pooled, shape)
return pooled
def compute_output_shape(self, input_shape):
@@ -490,7 +490,7 @@
generates target class IDs, bounding box deltas, and masks for each.
Inputs:
- proposals: [N, (y1, x1, y2, x2)] in normalized coordinates. Might
+ proposals: [POST_NMS_ROIS_TRAINING, (y1, x1, y2, x2)] in normalized coordinates. Might
be zero padded if there are not enough proposals.
gt_class_ids: [MAX_GT_INSTANCES] int class IDs
gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates.
@@ -500,9 +500,8 @@
and masks.
rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates
class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded.
- deltas: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
- Class-specific bbox refinements.
- masks: [TRAIN_ROIS_PER_IMAGE, height, width). Masks cropped to bbox
+ deltas: [TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw))]
+ masks: [TRAIN_ROIS_PER_IMAGE, height, width]. Masks cropped to bbox
boundaries and resized to neural network output size.
Note: Returned arrays might be zero padded if not enough target ROIs.
@@ -537,7 +536,7 @@
# Compute overlaps matrix [proposals, gt_boxes]
overlaps = overlaps_graph(proposals, gt_boxes)
- # Compute overlaps with crowd boxes [anchors, crowds]
+ # Compute overlaps with crowd boxes [proposals, crowd_boxes]
crowd_overlaps = overlaps_graph(proposals, crowd_boxes)
crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
no_crowd_bool = (crowd_iou_max < 0.001)
@@ -640,10 +639,8 @@
rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized
coordinates
target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs.
- target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, NUM_CLASSES,
- (dy, dx, log(dh), log(dw), class_id)]
- Class-specific bbox refinements.
- target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width)
+ target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)]
+ target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width]
Masks cropped to bbox boundaries and resized to neural
network output size.
@@ -673,7 +670,7 @@
def compute_output_shape(self, input_shape):
return [
(None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # rois
- (None, 1), # class_ids
+ (None, self.config.TRAIN_ROIS_PER_IMAGE), # class_ids
(None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # deltas
(None, self.config.TRAIN_ROIS_PER_IMAGE, self.config.MASK_SHAPE[0],
self.config.MASK_SHAPE[1]) # masks
@@ -696,10 +693,10 @@
probs: [N, num_classes]. Class probabilities.
deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific
bounding box deltas.
- window: (y1, x1, y2, x2) in image coordinates. The part of the image
+ window: (y1, x1, y2, x2) in normalized coordinates. The part of the image
that contains the image excluding the padding.
- Returns detections shaped: [N, (y1, x1, y2, x2, class_id, score)] where
+ Returns detections shaped: [num_detections, (y1, x1, y2, x2, class_id, score)] where
coordinates are normalized.
"""
# Class IDs per ROI
@@ -819,7 +816,7 @@
self.config.IMAGES_PER_GPU)
# Reshape output
- # [batch, num_detections, (y1, x1, y2, x2, class_score)] in
+ # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in
# normalized coordinates
return tf.reshape(
detections_batch,
@@ -842,9 +839,9 @@
every pixel in the feature map), or 2 (every other pixel).
Returns:
- rpn_logits: [batch, H, W, 2] Anchor classifier logits (before softmax)
- rpn_probs: [batch, H, W, 2] Anchor classifier probabilities.
- rpn_bbox: [batch, H, W, (dy, dx, log(dh), log(dw))] Deltas to be
+ rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax)
+ rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities.
+ rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be
applied to anchors.
"""
# TODO: check if stride of 2 causes alignment issues if the feature map
@@ -866,7 +863,7 @@
rpn_probs = KL.Activation(
"softmax", name="rpn_class_xxx")(rpn_class_logits)
- # Bounding box refinement. [batch, H, W, anchors per location, depth]
+ # Bounding box refinement. [batch, H, W, anchors per location * depth]
# where depth is [x, y, log(w), log(h)]
x = KL.Conv2D(anchors_per_location * 4, (1, 1), padding="valid",
activation='linear', name='rpn_bbox_pred')(shared)
@@ -888,9 +885,9 @@
depth: Depth of the backbone feature map.
Returns a Keras Model object. The model outputs, when called, are:
- rpn_logits: [batch, H, W, 2] Anchor classifier logits (before softmax)
- rpn_probs: [batch, W, W, 2] Anchor classifier probabilities.
- rpn_bbox: [batch, H, W, (dy, dx, log(dh), log(dw))] Deltas to be
+ rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax)
+ rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities.
+ rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be
applied to anchors.
"""
input_feature_map = KL.Input(shape=[None, None, depth],
@@ -913,7 +910,7 @@
coordinates.
feature_maps: List of feature maps from different layers of the pyramid,
[P2, P3, P4, P5]. Each has a different resolution.
- - image_meta: [batch, (meta data)] Image details. See compose_image_meta()
+ image_meta: [batch, (meta data)] Image details. See compose_image_meta()
pool_size: The width of the square feature map generated from ROI Pooling.
num_classes: number of classes, which determines the depth of the results
train_bn: Boolean. Train or freeze Batch Norm layers
@@ -920,13 +917,13 @@
fc_layers_size: Size of the 2 FC layers
Returns:
- logits: [N, NUM_CLASSES] classifier logits (before softmax)
- probs: [N, NUM_CLASSES] classifier probabilities
- bbox_deltas: [N, (dy, dx, log(dh), log(dw))] Deltas to apply to
+ logits: [batch, num_rois, NUM_CLASSES] classifier logits (before softmax)
+ probs: [batch, num_rois, NUM_CLASSES] classifier probabilities
+ bbox_deltas: [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Deltas to apply to
proposal boxes
"""
# ROI Pooling
- # Shape: [batch, num_boxes, pool_height, pool_width, channels]
+ # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels]
x = PyramidROIAlign([pool_size, pool_size],
name="roi_align_classifier")([rois, image_meta] + feature_maps)
# Two 1024 FC layers (implemented with Conv2D for consistency)
@@ -949,10 +946,10 @@
name="mrcnn_class")(mrcnn_class_logits)
# BBox head
- # [batch, boxes, num_classes * (dy, dx, log(dh), log(dw))]
+ # [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))]
x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'),
name='mrcnn_bbox_fc')(shared)
- # Reshape to [batch, boxes, num_classes, (dy, dx, log(dh), log(dw))]
+ # Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
s = K.int_shape(x)
mrcnn_bbox = KL.Reshape((s[1], num_classes, 4), name="mrcnn_bbox")(x)
@@ -972,10 +969,10 @@
num_classes: number of classes, which determines the depth of the results
train_bn: Boolean. Train or freeze Batch Norm layers
- Returns: Masks [batch, roi_count, height, width, num_classes]
+ Returns: Masks [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, NUM_CLASSES]
"""
# ROI Pooling
- # Shape: [batch, boxes, pool_height, pool_width, channels]
+ # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels]
x = PyramidROIAlign([pool_size, pool_size],
name="roi_align_mask")([rois, image_meta] + feature_maps)
@@ -1073,11 +1070,7 @@
target_bbox = batch_pack_graph(target_bbox, batch_counts,
config.IMAGES_PER_GPU)
- # TODO: use smooth_l1_loss() rather than reimplementing here
- # to reduce code duplication
- diff = K.abs(target_bbox - rpn_bbox)
- less_than_one = K.cast(K.less(diff, 1.0), "float32")
- loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5)
+ loss = smooth_l1_loss(target_bbox, rpn_bbox)
loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0))
return loss
@@ -1443,8 +1436,7 @@
gt_h = gt_y2 - gt_y1
# Resize mini mask to size of GT box
placeholder[gt_y1:gt_y2, gt_x1:gt_x2] = \
- np.round(skimage.transform.resize(
- class_mask, (gt_h, gt_w), order=1, mode="constant")).astype(bool)
+ np.round(utils.resize(class_mask, (gt_h, gt_w))).astype(bool)
# Place the mini batch in the placeholder
class_mask = placeholder
@@ -1451,7 +1443,7 @@
# Pick part of the mask and resize it
y1, x1, y2, x2 = rois[i].astype(np.int32)
m = class_mask[y1:y2, x1:x2]
- mask = skimage.transform.resize(m, config.MASK_SHAPE, order=1, mode="constant")
+ mask = utils.resize(m, config.MASK_SHAPE)
masks[i, :, :, class_id] = mask
return rois, roi_gt_class_ids, bboxes, masks
@@ -1877,7 +1869,7 @@
# Inputs
input_image = KL.Input(
- shape=[None, None, 3], name="input_image")
+ shape=[None, None, config.IMAGE_SHAPE[2]], name="input_image")
input_image_meta = KL.Input(shape=[config.IMAGE_META_SIZE],
name="input_image_meta")
if mode == "training":
@@ -2275,8 +2267,9 @@
if model_path:
# Continue from we left of. Get epoch and date from the file name
# A sample model path might look like:
- # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5
- regex = r".*/[\w-]+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})/mask\_rcnn\_[\w-]+(\d{4})\.h5"
+ # \path\to\logs\coco20171029T2315\mask_rcnn_coco_0001.h5 (Windows)
+ # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5 (Linux)
+ regex = r".*[/\\][\w-]+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})[/\\]mask\_rcnn\_[\w-]+(\d{4})\.h5"
m = re.match(regex, model_path)
if m:
now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)),
@@ -2290,10 +2283,6 @@
self.log_dir = os.path.join(self.model_dir, "{}{:%Y%m%dT%H%M}".format(
self.config.NAME.lower(), now))
- # Create log_dir if not exists
- if not os.path.exists(self.log_dir):
- os.makedirs(self.log_dir)
-
# Path to save after each epoch. Include placeholders that get filled by Keras.
self.checkpoint_path = os.path.join(self.log_dir, "mask_rcnn_{}_*epoch*.h5".format(
self.config.NAME.lower()))
@@ -2358,6 +2347,10 @@
val_generator = data_generator(val_dataset, self.config, shuffle=True,
batch_size=self.config.BATCH_SIZE)
+ # Create log_dir if it does not exist
+ if not os.path.exists(self.log_dir):
+ os.makedirs(self.log_dir)
+
# Callbacks
callbacks = [
keras.callbacks.TensorBoard(log_dir=self.log_dir,
Modified: grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/utils.py
===================================================================
--- grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/utils.py 2018-10-07 21:39:59 UTC (rev 73495)
+++ grass-addons/grass7/imagery/i.ann.maskrcnn/maskrcnnlib/utils.py 2018-10-08 09:24:30 UTC (rev 73496)
@@ -24,6 +24,7 @@
import urllib.request
import glob
import warnings
+from distutils.version import LooseVersion
# URL from which to download the latest COCO trained weights
# COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
@@ -332,17 +333,6 @@
assert info['source'] == source
return info['id']
- def append_data(self, class_info, image_info):
- self.external_to_class_id = {}
- for i, c in enumerate(self.class_info):
- for ds, id in c["map"]:
- self.external_to_class_id[ds + str(id)] = i
-
- # Map external image IDs to internal ones.
- self.external_to_image_id = {}
- for i, info in enumerate(self.image_info):
- self.external_to_image_id[info["ds"] + str(info["id"])] = i
-
@property
def image_ids(self):
return self._image_ids
@@ -496,9 +486,8 @@
# Resize image using bilinear interpolation
if scale != 1:
- image = skimage.transform.resize(
- image, (round(h * scale), round(w * scale)),
- order=1, mode="constant", preserve_range=True)
+ image = resize(image, (round(h * scale), round(w * scale)),
+ preserve_range=True)
# Need padding or cropping?
if mode == "square":
@@ -582,7 +571,7 @@
if m.size == 0:
raise Exception("Invalid bounding box with area of zero")
# Resize with bilinear interpolation
- m = skimage.transform.resize(m, mini_shape, order=1, mode="constant")
+ m = resize(m, mini_shape)
mini_mask[:, :, i] = np.around(m).astype(np.bool)
return mini_mask
@@ -600,7 +589,7 @@
h = y2 - y1
w = x2 - x1
# Resize with bilinear interpolation
- m = skimage.transform.resize(m, (h, w), order=1, mode="constant")
+ m = resize(m, (h, w))
mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
return mask
@@ -620,7 +609,7 @@
"""
threshold = 0.5
y1, x1, y2, x2 = bbox
- mask = skimage.transform.resize(mask, (y2 - y1, x2 - x1), order=1, mode="constant")
+ mask = resize(mask, (y2 - y1, x2 - x1))
mask = np.where(mask >= threshold, 1, 0).astype(np.bool)
# Put the mask in the right location.
@@ -921,3 +910,27 @@
scale = np.array([h - 1, w - 1, h - 1, w - 1])
shift = np.array([0, 0, 1, 1])
return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
+
+
+def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
+ preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
+ """A wrapper for Scikit-Image resize().
+
+ Scikit-Image generates warnings on every call to resize() if it doesn't
+ receive the right parameters. The right parameters depend on the version
+ of skimage. This solves the problem by using different parameters per
+ version. And it provides a central place to control resizing defaults.
+ """
+ if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
+ # New in 0.14: anti_aliasing. Default it to False for backward
+ # compatibility with skimage 0.13.
+ return skimage.transform.resize(
+ image, output_shape,
+ order=order, mode=mode, cval=cval, clip=clip,
+ preserve_range=preserve_range, anti_aliasing=anti_aliasing,
+ anti_aliasing_sigma=anti_aliasing_sigma)
+ else:
+ return skimage.transform.resize(
+ image, output_shape,
+ order=order, mode=mode, cval=cval, clip=clip,
+ preserve_range=preserve_range)
More information about the grass-commit
mailing list