diff --git a/keras_fsl/losses/yolo_loss.py b/keras_fsl/losses/yolo_loss.py index 3c363e7..9c89f06 100644 --- a/keras_fsl/losses/yolo_loss.py +++ b/keras_fsl/losses/yolo_loss.py @@ -9,51 +9,45 @@ def yolo_loss(anchors, threshold): threshold: """ + + @tf.function def _yolo_loss(y_true, y_pred): """ y_true and y_pred are (batch_size, number of boxes, 4 (+ 1) + number of classes (+ anchor_id for y_pred)). The number of boxes is determined by the network architecture as in single-shot detection one can only predict grid_width x grid_height boxes per anchor. """ - # 1. Find matching anchors: the anchor with the best IoU is chosen for predicting each true box - y_true_broadcast = tf.expand_dims(y_true, axis=2) - y_true_broadcast.shape - y_true_broadcast[..., 2:4].shape - - anchors_tensor = tf.broadcast_to(anchors[['height', 'width']].values, [1, 1, len(anchors), 2]) - anchors_tensor.shape - - height_width_min = tf.minimum(y_true_broadcast[..., 2:4], anchors_tensor) - height_width_max = tf.maximum(y_true_broadcast[..., 2:4], anchors_tensor) - height_width_min.shape - height_width_max.shape - intersection = tf.reduce_prod(height_width_min, axis=-1) - intersection.shape - true_box_area = tf.reduce_prod(y_true_broadcast[..., 2:4], axis=-1) - true_box_area.shape - anchor_boxes_area = tf.reduce_prod(anchors_tensor, axis=-1) - anchor_boxes_area.shape - union = true_box_area + anchor_boxes_area - intersection - union.shape - iou = intersection / union - iou.shape - best_anchor = tf.math.argmax(iou, axis=-1) - best_anchor.shape - best_anchor[0, 0] - - batch_size, boxes, _ = tf.shape(y_true) - # 2. Find grid cell: for each selected anchor, select the prediction coming from the cell which contains the true box center - for image in range(batch_size): - for box in range(boxes): - true_box_info = y_true[image, box] - selected_anchor = tf.cast(best_anchor[image, box], y_pred.dtype) - prediction_for_anchor = tf.boolean_mask(y_pred[image], y_pred[image, :, -1] == selected_anchor, axis=0) - prediction_for_anchor.shape - grid_size = prediction_for_anchor - y_pred[..., -1].shape == best_anchor - y_pred.shape - - # 3. For confidence loss: for each selected anchor, compute confidence loss for boxes with IoU < threshold - non_empty_boxes_mask = tf.cast(tf.math.reduce_prod(y_true[..., 2:4], axis=-1) > 0, tf.bool) - pass + loss_coordinates = tf.Variable(0.0) + loss_box = tf.Variable(0.0) + loss_objectness = tf.Variable(0.0) + loss_classes = tf.Variable(0.0) + + for image, pred in zip(y_true, y_pred): + loss_objectness.assign_add( + tf.math.reduce_sum(tf.keras.backend.binary_crossentropy(tf.zeros_like(y_pred[..., 4]), y_pred[..., 4]))) + for box in image: + if box[4] < 1: + continue + height_width_min = tf.minimum(box[2:4], anchors[['height', 'width']].values) + height_width_max = tf.maximum(box[2:4], anchors[['height', 'width']].values) + intersection = tf.reduce_prod(height_width_min, axis=-1) + union = tf.reduce_prod(height_width_max, axis=-1) + iou = intersection / union + best_iou = tf.reduce_max(iou) + for i, iou_ in enumerate(iou): + if iou_ < threshold: + continue + selected_anchor_map = pred[pred[..., -1] == tf.cast(i, pred.dtype)] + selected_cell = tf.argmin(tf.norm(box[:2] - selected_anchor_map[..., :2], axis=1)) + selected_pred = selected_anchor_map[selected_cell] + loss_objectness.assign_sub(tf.keras.backend.binary_crossentropy(0.0, selected_pred[4])) + + if iou_ == best_iou: + loss_objectness.assign_add(tf.keras.backend.binary_crossentropy(box[4], selected_pred[4])) + loss_coordinates.assign_add(tf.norm(box[:2] - selected_pred[:2], ord=2)) + loss_box.assign_add(tf.norm(box[2:4] - selected_pred[2:4], ord=2)) + loss_classes.assign_add(tf.reduce_sum(tf.keras.backend.binary_crossentropy(box[5:], selected_pred[5:-1]))) + + return loss_coordinates + loss_box + loss_objectness + loss_classes + return _yolo_loss diff --git a/keras_fsl/models/activations/yolo_box.py b/keras_fsl/models/activations/yolo_box.py index 8634932..fa76d5b 100644 --- a/keras_fsl/models/activations/yolo_box.py +++ b/keras_fsl/models/activations/yolo_box.py @@ -1,5 +1,5 @@ """ -Activation function for mapping feature into output coordinates as in Yolo V3 +Activation function for mapping feature into output box dimensions as in Yolo V3 """ import tensorflow as tf from tensorflow.keras.models import Sequential diff --git a/keras_fsl/models/feature_pyramid_net.py b/keras_fsl/models/feature_pyramid_net.py index 1029111..c5d32f4 100644 --- a/keras_fsl/models/feature_pyramid_net.py +++ b/keras_fsl/models/feature_pyramid_net.py @@ -127,7 +127,9 @@ def FeaturePyramidNet( for anchor in anchors.itertuples() ] outputs = Concatenate(axis=1)([ - Lambda(lambda output: tf.concat([output, tf.expand_dims(tf.ones(tf.shape(output)[:2], dtype=output.dtype) * index, -1)], axis=-1))(outputs[index]) + Lambda(lambda output, index_=index: ( + tf.concat([output, tf.expand_dims(tf.ones(tf.shape(output)[:2], dtype=output.dtype) * index_, -1)], axis=-1) + ))(outputs[index]) for index, anchor in anchors.iterrows() ])