diff --git a/sml/metrics/classification/classification_emul.py b/sml/metrics/classification/classification_emul.py
index 4da6a3a6..6f698d1c 100644
--- a/sml/metrics/classification/classification_emul.py
+++ b/sml/metrics/classification/classification_emul.py
@@ -34,101 +34,92 @@
 # TODO: design the enumation framework, just like py.unittest
 # all emulation action should begin with `emul_` (for reflection)
 def emul_auc(mode: emulation.Mode.MULTIPROCESS):
-    try:
-        # bandwidth and latency only work for docker mode
-        emulator = emulation.Emulator(
-            emulation.CLUSTER_ABY3_3PC, mode, bandwidth=300, latency=20
-        )
-        emulator.up()
-
-        # Create dataset
-        row = 10000
-        y_true = np.random.randint(0, 2, (row,))
-        y_pred = np.random.random((row,))
-
-        # Run
-        result = emulator.run(roc_auc_score)(
-            y_true, y_pred
-        )  # X, y should be two-dimension array
-        print(result)
+    # Create dataset
+    row = 10000
+    y_true = np.random.randint(0, 2, (row,))
+    y_pred = np.random.random((row,))
 
-    finally:
-        emulator.down()
+    # Run
+    result = emulator.run(roc_auc_score)(
+        y_true, y_pred
+    )  # X, y should be two-dimension array
+    print(result)
 
 
 def emul_Classification(mode: emulation.Mode.MULTIPROCESS):
-    try:
-        # bandwidth and latency only work for docker mode
-        emulator = emulation.Emulator(
-            emulation.CLUSTER_ABY3_3PC, mode, bandwidth=300, latency=20
+    def proc(
+        y_true, y_pred, average='binary', labels=None, pos_label=1, transform=1
+    ):
+        f1 = f1_score(
+            y_true,
+            y_pred,
+            average=average,
+            labels=labels,
+            pos_label=pos_label,
+            transform=transform,
         )
-        emulator.up()
+        precision = precision_score(
+            y_true,
+            y_pred,
+            average=average,
+            labels=labels,
+            pos_label=pos_label,
+            transform=transform,
+        )
+        recall = recall_score(
+            y_true,
+            y_pred,
+            average=average,
+            labels=labels,
+            pos_label=pos_label,
+            transform=transform,
+        )
+        accuracy = accuracy_score(y_true, y_pred)
+        return f1, precision, recall, accuracy
 
-        def proc(
-            y_true, y_pred, average='binary', labels=None, pos_label=1, transform=1
-        ):
-            f1 = f1_score(
-                y_true,
-                y_pred,
-                average=average,
-                labels=labels,
-                pos_label=pos_label,
-                transform=transform,
-            )
-            precision = precision_score(
-                y_true,
-                y_pred,
-                average=average,
-                labels=labels,
-                pos_label=pos_label,
-                transform=transform,
-            )
-            recall = recall_score(
-                y_true,
-                y_pred,
-                average=average,
-                labels=labels,
-                pos_label=pos_label,
-                transform=transform,
-            )
-            accuracy = accuracy_score(y_true, y_pred)
-            return f1, precision, recall, accuracy
+    def sklearn_proc(y_true, y_pred, average='binary', labels=None, pos_label=1):
+        f1 = metrics.f1_score(
+            y_true, y_pred, average=average, labels=labels, pos_label=pos_label
+        )
+        precision = metrics.precision_score(
+            y_true, y_pred, average=average, labels=labels, pos_label=pos_label
+        )
+        recall = metrics.recall_score(
+            y_true, y_pred, average=average, labels=labels, pos_label=pos_label
+        )
+        accuracy = metrics.accuracy_score(y_true, y_pred)
+        return f1, precision, recall, accuracy
 
-        def sklearn_proc(y_true, y_pred, average='binary', labels=None, pos_label=1):
-            f1 = metrics.f1_score(
-                y_true, y_pred, average=average, labels=labels, pos_label=pos_label
-            )
-            precision = metrics.precision_score(
-                y_true, y_pred, average=average, labels=labels, pos_label=pos_label
-            )
-            recall = metrics.recall_score(
-                y_true, y_pred, average=average, labels=labels, pos_label=pos_label
-            )
-            accuracy = metrics.accuracy_score(y_true, y_pred)
-            return f1, precision, recall, accuracy
+    def check(spu_result, sk_result):
+        for pair in zip(spu_result, sk_result):
+            np.testing.assert_allclose(pair[0], pair[1], rtol=1, atol=1e-5)
 
-        def check(spu_result, sk_result):
-            for pair in zip(spu_result, sk_result):
-                np.testing.assert_allclose(pair[0], pair[1], rtol=1, atol=1e-5)
+    # Test binary
+    y_true = jnp.array([0, 1, 1, 0, 1, 1])
+    y_pred = jnp.array([0, 0, 1, 0, 1, 1])
+    spu_result = emulator.run(proc)(y_true, y_pred, average=None, labels=[0, 1, 2])
+    sk_result = sklearn_proc(y_true, y_pred, average=None, labels=[0, 1, 2])
+    check(spu_result, sk_result)
 
-        # Test binary
-        y_true = jnp.array([0, 1, 1, 0, 1, 1])
-        y_pred = jnp.array([0, 0, 1, 0, 1, 1])
-        spu_result = emulator.run(proc)(y_true, y_pred, average=None, labels=[0, 1, 2])
-        sk_result = sklearn_proc(y_true, y_pred, average=None, labels=[0, 1, 2])
-        check(spu_result, sk_result)
+    # Test multiclass
+    y_true = jnp.array([0, 1, 1, 0, 2, 1])
+    y_pred = jnp.array([0, 0, 1, 0, 2, 1])
+    spu_result = emulator.run(proc)(y_true, y_pred, average=None, labels=[0, 1, 2])
+    sk_result = sklearn_proc(y_true, y_pred, average=None, labels=[0, 1, 2])
+    check(spu_result, sk_result)
 
-        # Test multiclass
-        y_true = jnp.array([0, 1, 1, 0, 2, 1])
-        y_pred = jnp.array([0, 0, 1, 0, 2, 1])
-        spu_result = emulator.run(proc)(y_true, y_pred, average=None, labels=[0, 1, 2])
-        sk_result = sklearn_proc(y_true, y_pred, average=None, labels=[0, 1, 2])
-        check(spu_result, sk_result)
 
+if __name__ == "__main__":
+    try:
+        # bandwidth and latency only work for docker mode
+        emulator = emulation.Emulator(
+            emulation.CLUSTER_ABY3_3PC,
+            emulation.Mode.MULTIPROCESS,
+            bandwidth=300,
+            latency=20,
+        )
+        emulator.up()
+        emul_auc(emulation.Mode.MULTIPROCESS)
+        emul_Classification(emulation.Mode.MULTIPROCESS)
     finally:
         emulator.down()
-
-
-if __name__ == "__main__":
-    emul_auc(emulation.Mode.MULTIPROCESS)
-    emul_Classification(emulation.Mode.MULTIPROCESS)