diff --git a/keras_fsl/losses/yolo_loss.py b/keras_fsl/losses/yolo_loss.py
index 3c363e7..9c89f06 100644
--- a/keras_fsl/losses/yolo_loss.py
+++ b/keras_fsl/losses/yolo_loss.py
@@ -9,51 +9,45 @@ def yolo_loss(anchors, threshold):
         threshold:
 
     """
+
+    @tf.function
     def _yolo_loss(y_true, y_pred):
         """
         y_true and y_pred are (batch_size, number of boxes, 4 (+ 1) + number of classes (+ anchor_id for y_pred)).
         The number of boxes is determined by the network architecture as in single-shot detection one can only predict
         grid_width x grid_height boxes per anchor.
         """
-        # 1. Find matching anchors: the anchor with the best IoU is chosen for predicting each true box
-        y_true_broadcast = tf.expand_dims(y_true, axis=2)
-        y_true_broadcast.shape
-        y_true_broadcast[..., 2:4].shape
-
-        anchors_tensor = tf.broadcast_to(anchors[['height', 'width']].values, [1, 1, len(anchors), 2])
-        anchors_tensor.shape
-
-        height_width_min = tf.minimum(y_true_broadcast[..., 2:4], anchors_tensor)
-        height_width_max = tf.maximum(y_true_broadcast[..., 2:4], anchors_tensor)
-        height_width_min.shape
-        height_width_max.shape
-        intersection = tf.reduce_prod(height_width_min, axis=-1)
-        intersection.shape
-        true_box_area = tf.reduce_prod(y_true_broadcast[..., 2:4], axis=-1)
-        true_box_area.shape
-        anchor_boxes_area = tf.reduce_prod(anchors_tensor, axis=-1)
-        anchor_boxes_area.shape
-        union = true_box_area + anchor_boxes_area - intersection
-        union.shape
-        iou = intersection / union
-        iou.shape
-        best_anchor = tf.math.argmax(iou, axis=-1)
-        best_anchor.shape
-        best_anchor[0, 0]
-
-        batch_size, boxes, _ = tf.shape(y_true)
-        # 2. Find grid cell: for each selected anchor, select the prediction coming from the cell which contains the true box center
-        for image in range(batch_size):
-            for box in range(boxes):
-                true_box_info = y_true[image, box]
-                selected_anchor = tf.cast(best_anchor[image, box], y_pred.dtype)
-                prediction_for_anchor = tf.boolean_mask(y_pred[image], y_pred[image, :, -1] == selected_anchor, axis=0)
-                prediction_for_anchor.shape
-                grid_size = prediction_for_anchor
-        y_pred[..., -1].shape == best_anchor
-        y_pred.shape
-
-        # 3. For confidence loss: for each selected anchor, compute confidence loss for boxes with IoU < threshold
-        non_empty_boxes_mask = tf.cast(tf.math.reduce_prod(y_true[..., 2:4], axis=-1) > 0, tf.bool)
-        pass
+        loss_coordinates = tf.Variable(0.0)
+        loss_box = tf.Variable(0.0)
+        loss_objectness = tf.Variable(0.0)
+        loss_classes = tf.Variable(0.0)
+
+        for image, pred in zip(y_true, y_pred):
+            loss_objectness.assign_add(
+                tf.math.reduce_sum(tf.keras.backend.binary_crossentropy(tf.zeros_like(y_pred[..., 4]), y_pred[..., 4])))
+            for box in image:
+                if box[4] < 1:
+                    continue
+                height_width_min = tf.minimum(box[2:4], anchors[['height', 'width']].values)
+                height_width_max = tf.maximum(box[2:4], anchors[['height', 'width']].values)
+                intersection = tf.reduce_prod(height_width_min, axis=-1)
+                union = tf.reduce_prod(height_width_max, axis=-1)
+                iou = intersection / union
+                best_iou = tf.reduce_max(iou)
+                for i, iou_ in enumerate(iou):
+                    if iou_ < threshold:
+                        continue
+                    selected_anchor_map = pred[pred[..., -1] == tf.cast(i, pred.dtype)]
+                    selected_cell = tf.argmin(tf.norm(box[:2] - selected_anchor_map[..., :2], axis=1))
+                    selected_pred = selected_anchor_map[selected_cell]
+                    loss_objectness.assign_sub(tf.keras.backend.binary_crossentropy(0.0, selected_pred[4]))
+
+                    if iou_ == best_iou:
+                        loss_objectness.assign_add(tf.keras.backend.binary_crossentropy(box[4], selected_pred[4]))
+                        loss_coordinates.assign_add(tf.norm(box[:2] - selected_pred[:2], ord=2))
+                        loss_box.assign_add(tf.norm(box[2:4] - selected_pred[2:4], ord=2))
+                        loss_classes.assign_add(tf.reduce_sum(tf.keras.backend.binary_crossentropy(box[5:], selected_pred[5:-1])))
+
+        return loss_coordinates + loss_box + loss_objectness + loss_classes
+
     return _yolo_loss
diff --git a/keras_fsl/models/activations/yolo_box.py b/keras_fsl/models/activations/yolo_box.py
index 8634932..fa76d5b 100644
--- a/keras_fsl/models/activations/yolo_box.py
+++ b/keras_fsl/models/activations/yolo_box.py
@@ -1,5 +1,5 @@
 """
-Activation function for mapping feature into output coordinates as in Yolo V3
+Activation function for mapping feature into output box dimensions as in Yolo V3
 """
 import tensorflow as tf
 from tensorflow.keras.models import Sequential
diff --git a/keras_fsl/models/feature_pyramid_net.py b/keras_fsl/models/feature_pyramid_net.py
index 1029111..c5d32f4 100644
--- a/keras_fsl/models/feature_pyramid_net.py
+++ b/keras_fsl/models/feature_pyramid_net.py
@@ -127,7 +127,9 @@ def FeaturePyramidNet(
             for anchor in anchors.itertuples()
         ]
         outputs = Concatenate(axis=1)([
-            Lambda(lambda output: tf.concat([output, tf.expand_dims(tf.ones(tf.shape(output)[:2], dtype=output.dtype) * index, -1)], axis=-1))(outputs[index])
+            Lambda(lambda output, index_=index: (
+                tf.concat([output, tf.expand_dims(tf.ones(tf.shape(output)[:2], dtype=output.dtype) * index_, -1)], axis=-1)
+            ))(outputs[index])
             for index, anchor in anchors.iterrows()
         ])